Re: [PATCH v2] drm/buddy: Fix the warn on's during force merge

2024-05-17 Thread Paneer Selvam, Arunpravin

Hi Dave,
Please help pull this patch into drm-next. This will fix any unnecessary
warnings in memory pressure situation.

Thanks for the help.

Regards,
Arun.

On 5/17/2024 8:03 PM, Arunpravin Paneer Selvam wrote:

Move the fallback and block incompatible checks
above, so that we dont unnecessarily split the blocks
and leaving the unmerged. This resolves the unnecessary
warn on's thrown during force_merge call.

v2:(Matthew)
   - Move the fallback and block incompatible checks above
 the contains check.

Signed-off-by: Arunpravin Paneer Selvam 
Reviewed-by: Matthew Auld 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
Link: 
https://patchwork.kernel.org/project/dri-devel/patch/20240517135015.17565-1-arunpravin.paneersel...@amd.com/
---
  drivers/gpu/drm/drm_buddy.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..94f8c34fc293 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm,
continue;
}
  
+		if (!fallback && block_incompatible(block, flags))

+   continue;
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
-   if (!fallback && block_incompatible(block, flags))
-   continue;
-
/*
 * Find the free block within the range.
 */




Re: 6.10/regression/bisected - commit a68c7eaa7a8f cause *ERROR* Trying to clear memory with ring turned off in amdgpu_fill_buffer.

2024-05-17 Thread Paneer Selvam, Arunpravin




On 5/17/2024 6:56 PM, Mikhail Gavrilov wrote:

Hi,
I am continuing to test unstable kernels.
Yesterday at Fedora Rawhide arrived the new kernel
20240516git3c999d1ae3c7 and I spotted in kernel log new error message:
[   13.676117] [drm] Seamless boot condition check passed
[   13.676996] [drm:amdgpu_fill_buffer [amdgpu]] *ERROR* Trying to
clear memory with ring turned off.
[   13.677404] [ cut here ]
[   13.677422] WARNING: CPU: 24 PID: 746 at
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:1382
amdgpu_bo_release_notify+0x4ed/0x5e0 [amdgpu]
[   13.677711] Modules linked in: amdgpu(+) crct10dif_pclmul
crc32_pclmul crc32c_intel polyval_clmulni polyval_generic amdxcp
i2c_algo_bit drm_ttm_helper ttm drm_exec gpu_sched ghash_clmulni_intel
drm_suballoc_helper sha512_ssse3 drm_buddy sha256_ssse3
drm_display_helper ccp sha1_ssse3 r8169 nvme sp5100_tco realtek
nvme_core nvme_auth video wmi ip6_tables ip_tables fuse
[   13.677749] CPU: 24 PID: 746 Comm: (udev-worker) Tainted: G
WL---  ---
6.10.0-0.rc0.20240516git3c999d1ae3c7.5.fc41.x86_64+debug #1
[   13.677752] Hardware name: ASRock B650I Lightning WiFi/B650I
Lightning WiFi, BIOS 2.10 03/20/2024
[   13.677753] RIP: 0010:amdgpu_bo_release_notify+0x4ed/0x5e0 [amdgpu]
[   13.678005] Code: ff ff ff ff ff ff 7f 31 f6 4c 89 ef e8 2c 96 9c
e0 e9 45 ff ff ff 4c 89 ef e8 8f 7d 9c e0 e9 71 ff ff ff 0f 0b e9 f9
fb ff ff <0f> 0b e9 63 ff ff ff be 03 00 00 00 4c 89 ef e8 2f 06 f9 df
e9 51
[   13.678007] RSP: 0018:c9000469ea08 EFLAGS: 00010282
[   13.678010] RAX: ffea RBX: 888270d1c048 RCX: 
[   13.678011] RDX: ffea RSI: f520008d3d1a RDI: f520008d3cde
[   13.678013] RBP: 1920008d3d42 R08: 0001 R09: 888270d1c298
[   13.678014] R10: c9000469e51f R11:  R12: 888257047b80
[   13.678015] R13: 888273610200 R14: 888270d1c000 R15: 888270d1c180
[   13.678017] FS:  7f22a16fce00() GS:888dfde0()
knlGS:
[   13.678018] CS:  0010 DS:  ES:  CR0: 80050033
[   13.678020] CR2: 7f229f349000 CR3: 0001453d4000 CR4: 00f50ef0
[   13.678021] PKRU: 5554
[   13.678022] Call Trace:
[   13.678023]  
[   13.678025]  ? __warn.cold+0x5b/0x1af
[   13.678030]  ? amdgpu_bo_release_notify+0x4ed/0x5e0 [amdgpu]
[   13.686810]  ? report_bug+0x1fc/0x3d0
[   13.686817]  ? handle_bug+0x3c/0x80
[   13.686821]  ? exc_invalid_op+0x17/0x40
[   13.686824]  ? asm_exc_invalid_op+0x1a/0x20
[   13.686833]  ? amdgpu_bo_release_notify+0x4ed/0x5e0 [amdgpu]
[   13.694852]  ? amdgpu_bo_release_notify+0x37e/0x5e0 [amdgpu]
[   13.695094]  ? __pfx_amdgpu_bo_release_notify+0x10/0x10 [amdgpu]
[   13.695332]  ? __pfx_lock_release+0x10/0x10
[   13.695342]  ttm_bo_release+0x266/0xa50 [ttm]
[   13.695351]  ? __pfx_ttm_bo_release+0x10/0x10 [ttm]
[   13.695363]  amdgpu_bo_free_kernel+0x269/0x3c0 [amdgpu]
[   13.695613]  dm_helpers_free_gpu_mem+0xbf/0x250 [amdgpu]
[   13.695925]  dcn315_clk_mgr_construct+0x630/0x3a20 [amdgpu]
[   13.696235]  ? __pfx_dcn315_clk_mgr_construct+0x10/0x10 [amdgpu]
[   13.696535]  dc_clk_mgr_create+0x34e/0xf10 [amdgpu]
[   13.696821]  ? dc_create_resource_pool+0x67e/0x810 [amdgpu]
[   13.697103]  dc_create+0x939/0x1d20 [amdgpu]
[   13.697374]  ? _printk+0xcc/0x102
[   13.697378]  ? __pfx__printk+0x10/0x10
[   13.697382]  ? __pfx_dc_create+0x10/0x10 [amdgpu]
[   13.697658]  ? dmi_matches+0xe6/0x350
[   13.697664]  amdgpu_dm_init.isra.0+0x8c5/0x5970 [amdgpu]
[   13.697958]  ? __pfx_amdgpu_dm_init.isra.0+0x10/0x10 [amdgpu]
[   13.698237]  ? __pfx_dev_printk_emit+0x10/0x10
[   13.698243]  ? rcu_is_watching+0x12/0xc0
[   13.698246]  ? amdgpu_device_wreg+0x15f/0x1b0 [amdgpu]
[   13.698487]  ? smu_hw_init.cold+0x110/0x1e2 [amdgpu]
[   13.727854]  ? __pfx_smu_hw_init+0x10/0x10 [amdgpu]
[   13.728138]  dm_hw_init+0x12/0x30 [amdgpu]
[   13.728417]  amdgpu_device_init.cold+0x4157/0x5a27 [amdgpu]
[   13.728714]  ? __pfx_pci_bus_read_config_word+0x10/0x10
[   13.728718]  ? __pfx_amdgpu_device_init+0x10/0x10 [amdgpu]
[   13.728958]  ? do_pci_enable_device+0x200/0x2a0
[   13.728961]  ? __pfx_do_pci_enable_device+0x10/0x10
[   13.728965]  ? _raw_spin_unlock_irqrestore+0x66/0x80
[   13.728968]  ? lockdep_hardirqs_on+0x7c/0x100
[   13.728971]  ? __kasan_check_byte+0x13/0x50
[   13.728977]  amdgpu_driver_load_kms+0x19/0xa0 [amdgpu]
[   13.729209]  amdgpu_pci_probe+0x310/0xc20 [amdgpu]
[   13.729441]  ? _raw_spin_unlock_irqrestore+0x4f/0x80
[   13.729444]  ? __pfx_amdgpu_pci_probe+0x10/0x10 [amdgpu]
[   13.729683]  local_pci_probe+0xdc/0x180
[   13.729687]  pci_device_probe+0x23c/0x810
[   13.729689]  ? kernfs_add_one+0x3ab/0x4a0
[   13.729692]  ? kernfs_new_node+0x13d/0x240
[   13.729694]  ? __pfx_pci_device_probe+0x10/0x10
[   13.729698]  ? kernfs_create_link+0x16e/0x240
[   13.729701]  ? kernfs_put+0x1c/0x40
[   13.729704]  ? sysfs_do_create_link_sd+0x8e/0x100
[   13.729709]  really_probe+0x1e0/0x8a0
[   13.729714]  

Re: [PATCH v2] drm/buddy: Fix the warn on's during force merge

2024-05-17 Thread Paneer Selvam, Arunpravin




On 5/17/2024 7:30 PM, Matthew Auld wrote:

On 17/05/2024 14:50, Arunpravin Paneer Selvam wrote:

Move the fallback and block incompatible checks
above, so that we dont unnecessarily split the blocks
and leaving the unmerged. This resolves the unnecessary
warn on's thrown during force_merge call.

v2:(Matthew)
   - Move the fallback and block incompatible checks above
 the contains check.

Signed-off-by: Arunpravin Paneer Selvam 


Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")

Reviewed-by: Matthew Auld 

A follow up unit test to catch this edge case would be lovely.

Yes, I will follow up on this.

Thanks,
Arun.



---
  drivers/gpu/drm/drm_buddy.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..94f8c34fc293 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm,
  continue;
  }
  +    if (!fallback && block_incompatible(block, flags))
+    continue;
+
  if (contains(start, end, block_start, block_end) &&
  order == drm_buddy_block_order(block)) {
-    if (!fallback && block_incompatible(block, flags))
-    continue;
-
  /*
   * Find the free block within the range.
   */




Re: [PATCH v2] drm/buddy: Fix the warn on's during force merge

2024-05-17 Thread Paneer Selvam, Arunpravin

Hi Matthew,
This fixes the problem.

Regards,
Arun.

On 5/17/2024 7:20 PM, Arunpravin Paneer Selvam wrote:

Move the fallback and block incompatible checks
above, so that we dont unnecessarily split the blocks
and leaving the unmerged. This resolves the unnecessary
warn on's thrown during force_merge call.

v2:(Matthew)
   - Move the fallback and block incompatible checks above
 the contains check.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
  drivers/gpu/drm/drm_buddy.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..94f8c34fc293 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -524,11 +524,11 @@ __alloc_range_bias(struct drm_buddy *mm,
continue;
}
  
+		if (!fallback && block_incompatible(block, flags))

+   continue;
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
-   if (!fallback && block_incompatible(block, flags))
-   continue;
-
/*
 * Find the free block within the range.
 */




Re: [PATCH] drm/buddy: Merge back blocks in bias range function

2024-05-17 Thread Paneer Selvam, Arunpravin

Hi Matthew,
Could you help review this patch quickly.

Hi Dave
This patch just fixes the unnecessary warn on's triggered during
the force_merge call.

Regards,
Arun.

On 5/17/2024 6:08 PM, Arunpravin Paneer Selvam wrote:

In bias range allocation, when we don't find the required
blocks (i.e) on returning the -ENOSPC, we should merge back the
split blocks. Otherwise, during force_merge we are flooded with
warn on's due to block and its buddy are in same clear state
(dirty or clear).

Hence, renamed the force_merge with merge_blocks and passed a
force_merge as a bool function parameter. Based on the requirement,
say, in any normal situation we can call the merge_blocks passing
the force_merge variable as false. And, in any memory cruch situation,
we can call the merge_blocks passing the force_merge as true. This
resolves the unnecessary warn on's thrown during force_merge call.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
---
  drivers/gpu/drm/drm_buddy.c | 32 ++--
  1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 1daf778cf6fa..111f602f1359 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -161,10 +161,11 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
return order;
  }
  
-static int __force_merge(struct drm_buddy *mm,

-u64 start,
-u64 end,
-unsigned int min_order)
+static int __merge_blocks(struct drm_buddy *mm,
+ u64 start,
+ u64 end,
+ unsigned int min_order,
+ bool force_merge)
  {
unsigned int order;
int i;
@@ -195,8 +196,9 @@ static int __force_merge(struct drm_buddy *mm,
if (!drm_buddy_block_is_free(buddy))
continue;
  
-			WARN_ON(drm_buddy_block_is_clear(block) ==

-   drm_buddy_block_is_clear(buddy));
+   if (force_merge)
+   WARN_ON(drm_buddy_block_is_clear(block) ==
+   drm_buddy_block_is_clear(buddy));
  
  			/*

 * If the prev block is same as buddy, don't access the
@@ -210,7 +212,7 @@ static int __force_merge(struct drm_buddy *mm,
if (drm_buddy_block_is_clear(block))
mm->clear_avail -= drm_buddy_block_size(mm, 
block);
  
-			order = __drm_buddy_free(mm, block, true);

+   order = __drm_buddy_free(mm, block, force_merge);
if (order >= min_order)
return 0;
}
@@ -332,7 +334,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
  
  	for (i = 0; i < mm->n_roots; ++i) {

order = ilog2(size) - ilog2(mm->chunk_size);
-   __force_merge(mm, 0, size, order);
+   __merge_blocks(mm, 0, size, order, true);
  
  		WARN_ON(!drm_buddy_block_is_free(mm->roots[i]));

drm_block_free(mm, mm->roots[i]);
@@ -479,7 +481,7 @@ __alloc_range_bias(struct drm_buddy *mm,
   unsigned long flags,
   bool fallback)
  {
-   u64 req_size = mm->chunk_size << order;
+   u64 size, root_size, req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -487,6 +489,7 @@ __alloc_range_bias(struct drm_buddy *mm,
int i;
  
  	end = end - 1;

+   size = mm->size;
  
  	for (i = 0; i < mm->n_roots; ++i)

list_add_tail(>roots[i]->tmp_link, );
@@ -548,6 +551,15 @@ __alloc_range_bias(struct drm_buddy *mm,
list_add(>left->tmp_link, );
} while (1);
  
+	/* Merge back the split blocks */

+   for (i = 0; i < mm->n_roots; ++i) {
+   order = ilog2(size) - ilog2(mm->chunk_size);
+   __merge_blocks(mm, start, end, order, false);
+
+   root_size = mm->chunk_size << order;
+   size -= root_size;
+   }
+
return ERR_PTR(-ENOSPC);
  
  err_undo:

@@ -1026,7 +1038,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
if (order-- == min_order) {
/* Try allocation through force merge method */
if (mm->clear_avail &&
-   !__force_merge(mm, start, end, min_order)) {
+   !__merge_blocks(mm, start, end, min_order, 
true)) {
block = __drm_buddy_alloc_blocks(mm, 
start,
 end,
 
min_order,




Re: [PATCH v2 2/2] drm/tests: Add a unit test for range bias allocation

2024-05-13 Thread Paneer Selvam, Arunpravin

Hi Matthew,

On 5/13/2024 1:49 PM, Matthew Auld wrote:

On 12/05/2024 08:59, Arunpravin Paneer Selvam wrote:

Allocate cleared blocks in the bias range when the DRM
buddy's clear avail is zero. This will validate the bias
range allocation in scenarios like system boot when no
cleared blocks are available and exercise the fallback
path too. The resulting blocks should always be dirty.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/tests/drm_buddy_test.c | 35 ++
  1 file changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c 
b/drivers/gpu/drm/tests/drm_buddy_test.c

index e3b50e240d36..a194f271bc55 100644
--- a/drivers/gpu/drm/tests/drm_buddy_test.c
+++ b/drivers/gpu/drm/tests/drm_buddy_test.c
@@ -26,6 +26,8 @@ static void drm_test_buddy_alloc_range_bias(struct 
kunit *test)

  u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem;
  DRM_RND_STATE(prng, random_seed);
  unsigned int i, count, *order;
+    struct drm_buddy_block *block;
+    unsigned long flags;
  struct drm_buddy mm;
  LIST_HEAD(allocated);
  @@ -222,6 +224,39 @@ static void 
drm_test_buddy_alloc_range_bias(struct kunit *test)

    drm_buddy_free_list(, , 0);
  drm_buddy_fini();
+
+    /*
+ * Allocate cleared blocks in the bias range when the DRM 
buddy's clear avail is
+ * zero. This will validate the bias range allocation in 
scenarios like system boot
+ * when no cleared blocks are available and exercise the 
fallback path too. The resulting

+ * blocks should always be dirty.
+ */
+
+    KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(, mm_size, ps),
+   "buddy_init failed\n");
+    mm.clear_avail = 0;


Should already be zero, right? Maybe make this an assert instead?
No, since the mm declared as a local variable in the test case, 
mm.clear_avail is not zero.



+
+    bias_start = round_up(prandom_u32_state() % (mm_size - ps), 
ps);
+    bias_end = round_up(bias_start + prandom_u32_state() % 
(mm_size - bias_start), ps);

+    bias_end = max(bias_end, bias_start + ps);
+    bias_rem = bias_end - bias_start;
+
+    flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+    u32 size = max(round_up(prandom_u32_state() % bias_rem, 
ps), ps);


u32 declaration should be moved to above?

Sure.

Thanks,
Arun.


Otherwise,
Reviewed-by: Matthew Auld 


+
+    KUNIT_ASSERT_FALSE_MSG(test,
+   drm_buddy_alloc_blocks(, bias_start,
+  bias_end, size, ps,
+  ,
+  flags),
+   "buddy_alloc failed with bias(%x-%x), size=%u, 
ps=%u\n",

+   bias_start, bias_end, size, ps);
+
+    list_for_each_entry(block, , link)
+    KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+
+    drm_buddy_free_list(, , 0);
+    drm_buddy_fini();
  }
    static void drm_test_buddy_alloc_clear(struct kunit *test)




Re: [PATCH] drm/buddy: Fix the range bias clear memory allocation issue

2024-05-12 Thread Paneer Selvam, Arunpravin

Hi Daniel,

On 5/8/2024 2:11 PM, Daniel Vetter wrote:

On Wed, May 08, 2024 at 12:27:20PM +0530, Arunpravin Paneer Selvam wrote:

Problem statement: During the system boot time, an application request
for the bulk volume of cleared range bias memory when the clear_avail
is zero, we dont fallback into normal allocation method as we had an
unnecessary clear_avail check which prevents the fallback method leads
to fb allocation failure following system goes into unresponsive state.

Solution: Remove the unnecessary clear_avail check in the range bias
allocation function.

Signed-off-by: Arunpravin Paneer Selvam 
Fixes: 96950929eb23 ("drm/buddy: Implement tracking clear page feature")
Reviewed-by: Matthew Auld 
---
  drivers/gpu/drm/drm_buddy.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

Can you please also add a kunit test case to exercise this corner case and
make sure it stays fixed?

I have sent the v2 along with a kunit test case for this corner case.

Thanks,
Arun.


Thanks, Sima

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 284ebae71cc4..831929ac95eb 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -574,7 +574,7 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
  
  	block = __alloc_range_bias(mm, start, end, order,

   flags, fallback);
-   if (IS_ERR(block) && mm->clear_avail)
+   if (IS_ERR(block))
return __alloc_range_bias(mm, start, end, order,
  flags, !fallback);
  
--

2.25.1





Re: Splat during driver probe

2024-05-03 Thread Paneer Selvam, Arunpravin

Hi Alex,

yes, this is related to memory clearing changes. This is a known issue.
I am working on a fix.

Thanks,
Arun.

On 5/3/2024 6:46 PM, Alex Deucher wrote:

Possibly related to Arun's new memory clearing changes.  @Arunpravin
can you take a look?

Alex

On Fri, May 3, 2024 at 9:10 AM Tvrtko Ursulin  wrote:


Hi,

I tried asking on #radeon yesterday but no takers. I was wondering if
the below splat is already known or not.

Appears to happen due amdgpu_bo_release_notify genuniely running before
amdgpu_ttm_set_buffer_funcs_status set the buffer funcs to enabled
during device probe.

I couldn't easily figure out what changed.

Regards,

Tvrtko

[   11.802030] [drm:amdgpu_fill_buffer [amdgpu]] *ERROR* Trying to clear
memory with ring turned off.
[   11.802519] [ cut here ]
[   11.802530] WARNING: CPU: 5 PID: 435 at
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:1378
amdgpu_bo_release_notify+0x20c/0x230 [amdgpu]
[   11.802916] Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6
nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct
amdgpu(E+) nft_chain_nat nf_tables ip6table_nat ip6table_mangle
ip6table_raw ip6table_security iptable_nat nf_nat nf_conntrack
nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_raw
iptable_security nfnetlink ip6table_filter ip6_tables iptable_filter
cmac algif_hash ramoops algif_skcipher reed_solomon af_alg bnep qrtr
mousedev intel_rapl_msr ath11k_pci(+) intel_rapl_common mhi
snd_acp_sof_mach snd_acp_mach snd_sof_probes ath11k snd_soc_dmic kvm_amd
cdc_mbim qmi_helpers joydev hid_multitouch cdc_wdm snd_sof_amd_vangogh
snd_sof_pci kvm mac80211 snd_hda_codec_hdmi hci_uart crct10dif_pclmul
amdxcp i2c_algo_bit snd_sof_amd_acp btqca drm_ttm_helper libarc4
crc32_pclmul btrtl snd_hda_intel snd_sof_xtensa_dsp btbcm ttm
snd_intel_dspcfg btintel polyval_clmulni snd_sof drm_exec
polyval_generic snd_hda_codec snd_sof_utils gpu_sched cfg80211 bluetooth
snd_pci_acp5x gf128mul cdc_ncm
[   11.803070]  sp5100_tco snd_hwdep ghash_clmulni_intel snd_soc_nau8821
snd_soc_max98388 drm_suballoc_helper sha512_ssse3 cdc_ether
snd_acp_config drm_buddy atkbd snd_soc_core aesni_intel snd_hda_core
video ecdh_generic crypto_simd libps2 usbnet cryptd rapl mii wdat_wdt
vivaldi_fmap pcspkr snd_compress i2c_piix4 snd_pcm drm_display_helper
ccp snd_soc_acpi cdc_acm rfkill wmi ltrf216a 8250_dw i2c_hid_acpi
snd_timer snd i2c_hid industrialio soundcore hid_steam pkcs8_key_parser
crypto_user loop fuse dm_mod ip_tables x_tables overlay ext4 crc16
mbcache jbd2 usbhid vfat fat btrfs blake2b_generic libcrc32c
crc32c_generic xor raid6_pq sdhci_pci cqhci nvme serio_raw sdhci
xhci_pci xhci_pci_renesas crc32c_intel mmc_core nvme_core i8042 serio
spi_amd
[   11.803448] CPU: 5 PID: 435 Comm: (udev-worker) Tainted: GW
E  6.9.0-rc6 #3 dd3fb65c639fd86ff89731c604b1e804996e8989
[   11.803471] Hardware name: Valve Galileo/Galileo, BIOS F7G0107 12/01/2023
[   11.803486] RIP: 0010:amdgpu_bo_release_notify+0x20c/0x230 [amdgpu]
[   11.803857] Code: 0b e9 a4 fe ff ff 48 ba ff ff ff ff ff ff ff 7f 31
f6 4c 89 e7 e8 44 e5 33 e6 eb 8d e8 dd dd 33 e6 eb a7 0f 0b e9 4d fe ff
ff <0f> 0b eb 9c be 03 00 00 00 e8 f6 04 00 e6 eb 90 e8 8f 64 79 e6 66
[   11.803890] RSP: 0018:a77bc1537568 EFLAGS: 00010282
[   11.803904] RAX: ffea RBX: 8e1f0da89048 RCX:

[   11.803919] RDX:  RSI: 0027 RDI:

[   11.803934] RBP: 8e1f6ec0ffb0 R08:  R09:
0003
[   11.803948] R10: a77bc15372f0 R11: 8e223ef7ffe8 R12:
8e1f0da89000
[   11.803963] R13: 8e1f0da89180 R14: 8e1f6ec0ffb0 R15:
8e1f6ec0
[   11.803977] FS:  7fa2b600b200() GS:8e222ee8()
knlGS:
[   11.803994] CS:  0010 DS:  ES:  CR0: 80050033
[   11.804007] CR2: 55cac2aa7080 CR3: 00010f818000 CR4:
00350ef0
[   11.804021] Call Trace:
[   11.804031]  
[   11.804042]  ? __warn+0x8c/0x180
[   11.804057]  ? amdgpu_bo_release_notify+0x20c/0x230 [amdgpu
03b1dca7e09918e3f45efb1acdfc90682f73e4c1]
[   11.804456]  ? report_bug+0x191/0x1c0
[   11.804473]  ? handle_bug+0x3a/0x70
[   11.804485]  ? exc_invalid_op+0x17/0x70
[   11.804497]  ? asm_exc_invalid_op+0x1a/0x20
[   11.804517]  ? amdgpu_bo_release_notify+0x20c/0x230 [amdgpu
03b1dca7e09918e3f45efb1acdfc90682f73e4c1]
[   11.804894]  ? amdgpu_bo_release_notify+0x14a/0x230 [amdgpu
03b1dca7e09918e3f45efb1acdfc90682f73e4c1]
[   11.805277]  ttm_bo_release+0x115/0x330 [ttm
39c917822ce73b2a754d4183af7a0990991c66be]
[   11.805303]  ? srso_return_thunk+0x5/0x5f
[   11.805315]  ? __mutex_unlock_slowpath+0x3a/0x290
[   11.805332]  amdgpu_bo_free_kernel+0xd6/0x130 [amdgpu
03b1dca7e09918e3f45efb1acdfc90682f73e4c1]
[   11.805709]  dm_helpers_free_gpu_mem+0x41/0x80 [amdgpu
03b1dca7e09918e3f45efb1acdfc90682f73e4c1]
[   11.806172]  vg_clk_mgr_construct+0x2c4/0x490 [amdgpu
03b1dca7e09918e3f45efb1acdfc90682f73e4c1]
[   11.806645]  

Re: [PATCH v3] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-25 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/24/2024 2:02 PM, Christian König wrote:

Am 24.04.24 um 09:13 schrieb Arunpravin Paneer Selvam:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.
- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo validation
   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

v2:
   - keep the mem_flags and bo->flags check as is(Christian)
   - place the TTM_PL_FLAG_CONTIGUOUS flag setting into the
 amdgpu_bo_pin_restricted function placement range iteration
 loop(Christian)
   - rename find_pages with amdgpu_vram_mgr_calculate_pages_per_block
 (Christian)
   - Keep the kernel BO allocation as is(Christain)
   - If BO pin vram allocation failed, we need to return -ENOSPC as
 RDMA cannot work with scattered VRAM pages(Philip)

v3(Christian):
   - keep contiguous flag handling outside of pages_per_block
 calculation
   - remove the hacky implementation in contiguous flag error
 handling code

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  8 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 83 ++--
  2 files changed, 65 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 492aebc44e51..c594d2a5978e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -154,8 +154,10 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  else
  places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  -    if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+    if (abo->tbo.type == ttm_bo_type_kernel &&
+    flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
  places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
  c++;
  }
  @@ -965,6 +967,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  if (!bo->placements[i].lpfn ||
  (lpfn && lpfn < bo->placements[i].lpfn))
  bo->placements[i].lpfn = lpfn;
+
+    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+    bo->placements[i].mem_type == TTM_PL_VRAM)
+    bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
  }
    r = ttm_bo_validate(>tbo, >placement, );
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..17c5d9ce9927 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,23 @@ static inline u64 
amdgpu_vram_mgr_blocks_size(struct list_head *head)

  return size;
  }
  +static inline void amdgpu_vram_mgr_limit_min_block_size(unsigned 
long pages_per_block,

+    u64 size,
+    u64 *min_block_size,
+    bool contiguous_enabled)
+{
+    if (contiguous_enabled)
+    return;
+
+    /*
+ * if size >= 2MiB, limit the min_block_size to 2MiB
+ * for better TLB usage.
+ */
+    if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+    !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+    *min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+}
+
  /**
   * DOC: mem_info_vram_total
   *
@@ -452,11 +469,12 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  struct amdgpu_device *adev = to_amdgpu_device(mgr);
  struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
  u64 vis_usage = 0, max_bytes, min_block_size;
+    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
  struct amdgpu_vram_mgr_resource *vres;
  u64 size, remaining_size, lpfn, fpfn;
  struct drm_buddy *mm = >mm;
-    struct drm_buddy_block *block;
  unsigned long pages_per_block;
+    struct drm_buddy_block *block;
  int r;
    lpfn = (u64)place->lpfn << PAGE_SHIFT;
@@ -469,18 +487,14 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  if (tbo->type != ttm_bo_type_kernel)
  max_bytes -= AMDGPU_VM_RESERVED_VRAM;
  -    if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-    pages_per_block = ~0ul;
-    } else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
  pages_per_block = HPAGE_PMD_NR;


That won't work like this.

HPAGE_PMD_NR is 

Re: [PATCH v2] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-17 Thread Paneer Selvam, Arunpravin

Hi Philip,

On 4/17/2024 8:58 PM, Philip Yang wrote:



On 2024-04-17 10:32, Paneer Selvam, Arunpravin wrote:

Hi Christian,

On 4/17/2024 6:57 PM, Paneer Selvam, Arunpravin wrote:

Hi Christian,

On 4/17/2024 12:19 PM, Christian König wrote:

Am 17.04.24 um 08:21 schrieb Arunpravin Paneer Selvam:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.
- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo 
validation

   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

v2:
   - keep the mem_flags and bo->flags check as is(Christian)
   - place the TTM_PL_FLAG_CONTIGUOUS flag setting into the
 amdgpu_bo_pin_restricted function placement range iteration
 loop(Christian)
   - rename find_pages with amdgpu_vram_mgr_calculate_pages_per_block
 (Christian)
   - Keep the kernel BO allocation as is(Christain)
   - If BO pin vram allocation failed, we need to return -ENOSPC as
 RDMA cannot work with scattered VRAM pages(Philip)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  8 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 57 
+++-

  2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..caaef7b1df49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,8 +153,10 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  else
  places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  -    if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+    if (abo->tbo.type == ttm_bo_type_kernel &&
+    flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
  places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
  c++;
  }
  @@ -966,6 +968,10 @@ int amdgpu_bo_pin_restricted(struct 
amdgpu_bo *bo, u32 domain,

  if (!bo->placements[i].lpfn ||
  (lpfn && lpfn < bo->placements[i].lpfn))
  bo->placements[i].lpfn = lpfn;
+
+    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+    bo->placements[i].mem_type == TTM_PL_VRAM)
+    bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
  }
    r = ttm_bo_validate(>tbo, >placement, );


Nice work, up till here that looks exactly right as far as I can see.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..4be8b091099a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,29 @@ static inline u64 
amdgpu_vram_mgr_blocks_size(struct list_head *head)

  return size;
  }
  +static inline unsigned long
+amdgpu_vram_mgr_calculate_pages_per_block(struct 
ttm_buffer_object *tbo,

+  const struct ttm_place *place,
+  unsigned long bo_flags)
+{
+    unsigned long pages_per_block;
+
+    if (bo_flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+    pages_per_block = ~0ul;


If I understand it correctly this here enforces the allocation of a 
contiguous buffer in the way that it says we should have only one 
giant page for the whole BO.
yes, for contiguous we don't have the 2MB limit, hence we are 
setting to maximum to avoid restricting the min_block_size to 2MB 
limitation.



+    } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    pages_per_block = HPAGE_PMD_NR;
+#else
+    /* default to 2MB */
+    pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+    pages_per_block = max_t(uint32_t, pages_per_block,
+    tbo->page_alignment);
+    }
+
+    return pages_per_block;
+}
+
  /**
   * DOC: mem_info_vram_total
   *
@@ -451,8 +474,10 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
  struct amdgpu_device *adev = to_amdgpu_device(mgr);
  u64 vis_usage = 0, max_bytes, min_block_size;
+    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
  struct amdgpu_vram_mgr_resource *vres;
  u64 size, remaining_size, lpfn, fpfn;
+    unsigned long bo_flags = bo->flags;
  struct drm_buddy *mm = >

Re: [PATCH v2] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-17 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/17/2024 6:57 PM, Paneer Selvam, Arunpravin wrote:

Hi Christian,

On 4/17/2024 12:19 PM, Christian König wrote:

Am 17.04.24 um 08:21 schrieb Arunpravin Paneer Selvam:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.
- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo 
validation

   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

v2:
   - keep the mem_flags and bo->flags check as is(Christian)
   - place the TTM_PL_FLAG_CONTIGUOUS flag setting into the
 amdgpu_bo_pin_restricted function placement range iteration
 loop(Christian)
   - rename find_pages with amdgpu_vram_mgr_calculate_pages_per_block
 (Christian)
   - Keep the kernel BO allocation as is(Christain)
   - If BO pin vram allocation failed, we need to return -ENOSPC as
 RDMA cannot work with scattered VRAM pages(Philip)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  8 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 57 
+++-

  2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..caaef7b1df49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,8 +153,10 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  else
  places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  -    if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+    if (abo->tbo.type == ttm_bo_type_kernel &&
+    flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
  places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
  c++;
  }
  @@ -966,6 +968,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  if (!bo->placements[i].lpfn ||
  (lpfn && lpfn < bo->placements[i].lpfn))
  bo->placements[i].lpfn = lpfn;
+
+    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+    bo->placements[i].mem_type == TTM_PL_VRAM)
+    bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
  }
    r = ttm_bo_validate(>tbo, >placement, );


Nice work, up till here that looks exactly right as far as I can see.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..4be8b091099a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,29 @@ static inline u64 
amdgpu_vram_mgr_blocks_size(struct list_head *head)

  return size;
  }
  +static inline unsigned long
+amdgpu_vram_mgr_calculate_pages_per_block(struct ttm_buffer_object 
*tbo,

+  const struct ttm_place *place,
+  unsigned long bo_flags)
+{
+    unsigned long pages_per_block;
+
+    if (bo_flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+    pages_per_block = ~0ul;


If I understand it correctly this here enforces the allocation of a 
contiguous buffer in the way that it says we should have only one 
giant page for the whole BO.
yes, for contiguous we don't have the 2MB limit, hence we are setting 
to maximum to avoid restricting the min_block_size to 2MB limitation.



+    } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    pages_per_block = HPAGE_PMD_NR;
+#else
+    /* default to 2MB */
+    pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+    pages_per_block = max_t(uint32_t, pages_per_block,
+    tbo->page_alignment);
+    }
+
+    return pages_per_block;
+}
+
  /**
   * DOC: mem_info_vram_total
   *
@@ -451,8 +474,10 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
  struct amdgpu_device *adev = to_amdgpu_device(mgr);
  u64 vis_usage = 0, max_bytes, min_block_size;
+    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
  struct amdgpu_vram_mgr_resource *vres;
  u64 size, remaining_size, lpfn, fpfn;
+    unsigned long bo_flags = bo->flags;
  struct drm_buddy *mm = >mm;
  struct drm_buddy_block *block;
  unsigned long pages_per_block;
@@ -468,18 +493,9 @

Re: [PATCH v2] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-17 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/17/2024 12:19 PM, Christian König wrote:

Am 17.04.24 um 08:21 schrieb Arunpravin Paneer Selvam:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.
- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo validation
   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

v2:
   - keep the mem_flags and bo->flags check as is(Christian)
   - place the TTM_PL_FLAG_CONTIGUOUS flag setting into the
 amdgpu_bo_pin_restricted function placement range iteration
 loop(Christian)
   - rename find_pages with amdgpu_vram_mgr_calculate_pages_per_block
 (Christian)
   - Keep the kernel BO allocation as is(Christain)
   - If BO pin vram allocation failed, we need to return -ENOSPC as
 RDMA cannot work with scattered VRAM pages(Philip)

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  8 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 57 +++-
  2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..caaef7b1df49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,8 +153,10 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  else
  places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  -    if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+    if (abo->tbo.type == ttm_bo_type_kernel &&
+    flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
  places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
  c++;
  }
  @@ -966,6 +968,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  if (!bo->placements[i].lpfn ||
  (lpfn && lpfn < bo->placements[i].lpfn))
  bo->placements[i].lpfn = lpfn;
+
+    if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+    bo->placements[i].mem_type == TTM_PL_VRAM)
+    bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
  }
    r = ttm_bo_validate(>tbo, >placement, );


Nice work, up till here that looks exactly right as far as I can see.

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..4be8b091099a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,29 @@ static inline u64 
amdgpu_vram_mgr_blocks_size(struct list_head *head)

  return size;
  }
  +static inline unsigned long
+amdgpu_vram_mgr_calculate_pages_per_block(struct ttm_buffer_object 
*tbo,

+  const struct ttm_place *place,
+  unsigned long bo_flags)
+{
+    unsigned long pages_per_block;
+
+    if (bo_flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+    pages_per_block = ~0ul;


If I understand it correctly this here enforces the allocation of a 
contiguous buffer in the way that it says we should have only one 
giant page for the whole BO.
yes, for contiguous we don't have the 2MB limit, hence we are setting to 
maximum to avoid restricting the min_block_size to 2MB limitation.



+    } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    pages_per_block = HPAGE_PMD_NR;
+#else
+    /* default to 2MB */
+    pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+    pages_per_block = max_t(uint32_t, pages_per_block,
+    tbo->page_alignment);
+    }
+
+    return pages_per_block;
+}
+
  /**
   * DOC: mem_info_vram_total
   *
@@ -451,8 +474,10 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
  struct amdgpu_device *adev = to_amdgpu_device(mgr);
  u64 vis_usage = 0, max_bytes, min_block_size;
+    struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
  struct amdgpu_vram_mgr_resource *vres;
  u64 size, remaining_size, lpfn, fpfn;
+    unsigned long bo_flags = bo->flags;
  struct drm_buddy *mm = >mm;
  struct drm_buddy_block *block;
  unsigned long pages_per_block;
@@ -468,18 +493,9 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  if (tbo->type != ttm_bo_type_kernel)
  max_bytes -= AMDGPU_VM_RESERVED_VRAM;
  -    if 

Re: [PATCH] drm/amdgpu: clear seq64 memory on free

2024-04-16 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/16/2024 6:24 PM, Christian König wrote:

Am 16.04.24 um 14:34 schrieb Paneer Selvam, Arunpravin:

Hi Christian,

On 4/16/2024 5:47 PM, Christian König wrote:

Am 16.04.24 um 14:16 schrieb Paneer Selvam, Arunpravin:

Hi Christian,

On 4/16/2024 2:35 PM, Christian König wrote:

Am 15.04.24 um 20:48 schrieb Arunpravin Paneer Selvam:

We should clear the memory on free. Otherwise,
there is a chance that we will access the previous
application data and this would leads to an abnormal
behaviour in the current application.


Mhm, I would strongly expect that we initialize the seq number 
after allocation.


It could be that we also have situations were the correct start 
value is 0x or something like that instead.


Why does this matter?
when the user queue A's u64 address (fence address) is allocated to 
the newly created user queue B, we see a problem.
User queue B calls the signal IOCTL which creates a new fence 
having the wptr as the seq number, in
amdgpu_userq_fence_create function we have a check where we are 
comparing the rptr and wptr value (rptr >= wptr).
since the rptr value is read from the u64 address which has the 
user queue A's wptr data, this rptr >= wptr condition
gets satisfied and we are dropping the reference before the actual 
command gets processed in the hardware.

If we clear this u64 value on free, we dont see this problem.


Yeah, but that doesn't belongs into the seq64 handling.

Instead the code which allocates the seq64 during userqueue created 
needs to clear it to 0.

sure, got it.


Yeah, but fixing that aside. We should probably initialize the seq64 
array to something like 0xdeadbeef or a similar pattern to find issues 
were we forget to initialize the allocated slots.

yes, I will prepare a patch and send for the review.

Thanks,
Arun.


Regards,
Christian.



Thanks,
Arun.


Regards,
Christian.



Thanks,
Arun.


Regards,
Christian.



Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c

index 4b9afc4df031..9613992c9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -189,10 +189,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device 
*adev, u64 *va, u64 **cpu_addr)

  void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
  {
  unsigned long bit_pos;
+    u64 *cpu_addr;
    bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / 
sizeof(u64);

-    if (bit_pos < adev->seq64.num_sem)
+    if (bit_pos < adev->seq64.num_sem) {
+    cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+    memset(cpu_addr, 0, sizeof(u64));
  __clear_bit(bit_pos, adev->seq64.used);
+    }
  }
    /**














Re: [PATCH] drm/amdgpu: clear seq64 memory on free

2024-04-16 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/16/2024 5:47 PM, Christian König wrote:

Am 16.04.24 um 14:16 schrieb Paneer Selvam, Arunpravin:

Hi Christian,

On 4/16/2024 2:35 PM, Christian König wrote:

Am 15.04.24 um 20:48 schrieb Arunpravin Paneer Selvam:

We should clear the memory on free. Otherwise,
there is a chance that we will access the previous
application data and this would leads to an abnormal
behaviour in the current application.


Mhm, I would strongly expect that we initialize the seq number after 
allocation.


It could be that we also have situations were the correct start 
value is 0x or something like that instead.


Why does this matter?
when the user queue A's u64 address (fence address) is allocated to 
the newly created user queue B, we see a problem.
User queue B calls the signal IOCTL which creates a new fence having 
the wptr as the seq number, in
amdgpu_userq_fence_create function we have a check where we are 
comparing the rptr and wptr value (rptr >= wptr).
since the rptr value is read from the u64 address which has the user 
queue A's wptr data, this rptr >= wptr condition
gets satisfied and we are dropping the reference before the actual 
command gets processed in the hardware.

If we clear this u64 value on free, we dont see this problem.


Yeah, but that doesn't belongs into the seq64 handling.

Instead the code which allocates the seq64 during userqueue created 
needs to clear it to 0.

sure, got it.

Thanks,
Arun.


Regards,
Christian.



Thanks,
Arun.


Regards,
Christian.



Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c

index 4b9afc4df031..9613992c9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -189,10 +189,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device 
*adev, u64 *va, u64 **cpu_addr)

  void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
  {
  unsigned long bit_pos;
+    u64 *cpu_addr;
    bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
-    if (bit_pos < adev->seq64.num_sem)
+    if (bit_pos < adev->seq64.num_sem) {
+    cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+    memset(cpu_addr, 0, sizeof(u64));
  __clear_bit(bit_pos, adev->seq64.used);
+    }
  }
    /**










Re: [PATCH] drm/amdgpu: clear seq64 memory on free

2024-04-16 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/16/2024 2:35 PM, Christian König wrote:

Am 15.04.24 um 20:48 schrieb Arunpravin Paneer Selvam:

We should clear the memory on free. Otherwise,
there is a chance that we will access the previous
application data and this would leads to an abnormal
behaviour in the current application.


Mhm, I would strongly expect that we initialize the seq number after 
allocation.


It could be that we also have situations were the correct start value 
is 0x or something like that instead.


Why does this matter?
when the user queue A's u64 address (fence address) is allocated to the 
newly created user queue B, we see a problem.
User queue B calls the signal IOCTL which creates a new fence having the 
wptr as the seq number, in
amdgpu_userq_fence_create function we have a check where we are 
comparing the rptr and wptr value (rptr >= wptr).
since the rptr value is read from the u64 address which has the user 
queue A's wptr data, this rptr >= wptr condition
gets satisfied and we are dropping the reference before the actual 
command gets processed in the hardware.

If we clear this u64 value on free, we dont see this problem.

Thanks,
Arun.


Regards,
Christian.



Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c

index 4b9afc4df031..9613992c9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -189,10 +189,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device 
*adev, u64 *va, u64 **cpu_addr)

  void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
  {
  unsigned long bit_pos;
+    u64 *cpu_addr;
    bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
-    if (bit_pos < adev->seq64.num_sem)
+    if (bit_pos < adev->seq64.num_sem) {
+    cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+    memset(cpu_addr, 0, sizeof(u64));
  __clear_bit(bit_pos, adev->seq64.used);
+    }
  }
    /**






Re: [PATCH] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-16 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/16/2024 1:16 PM, Christian König wrote:

Am 16.04.24 um 00:02 schrieb Philip Yang:

On 2024-04-14 10:57, Arunpravin Paneer Selvam wrote:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.
This change will simplify the KFD best effort contiguous VRAM 
allocation, because KFD doesn't need set new GEM_ flag.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.


AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS used in couple of places. For page 
table BO, it is fine as BO size is page size 4K. For 64KB reserved 
BOs and F/W size related BOs, do all allocation happen at driver 
initialization before the VRAM is fragmented?




Oh, that's a really good point, we need to keep the behavior as is for 
kernel allocations. Arun can you take care of that?

Sure, I will take care kernel allocations.
Thanks,
Arun.


Thanks,
Christian.


- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo 
validation

   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

Signed-off-by: Arunpravin Paneer 
Selvam

Suggested-by: Christian König
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 14 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 57 
+++-

  2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..41926d631563 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,8 +153,6 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  else
  places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  -    if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-    places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
  c++;
  }
  @@ -899,6 +897,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  {
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  struct ttm_operation_ctx ctx = { false, false };
+    struct ttm_place *places = bo->placements;
+    u32 c = 0;
  int r, i;
    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
@@ -921,16 +921,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

    if (bo->tbo.pin_count) {
  uint32_t mem_type = bo->tbo.resource->mem_type;
-    uint32_t mem_flags = bo->tbo.resource->placement;
    if (!(domain & amdgpu_mem_type_to_domain(mem_type)))
  return -EINVAL;
  -    if ((mem_type == TTM_PL_VRAM) &&
-    (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) &&
-    !(mem_flags & TTM_PL_FLAG_CONTIGUOUS))
-    return -EINVAL;
-
This looks like a bug before, but with this patch, the check makes 
sense and is needed.

  ttm_bo_pin(>tbo);
    if (max_offset != 0) {
@@ -968,6 +962,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  bo->placements[i].lpfn = lpfn;
  }
  +    if (domain & AMDGPU_GEM_DOMAIN_VRAM &&
+    !WARN_ON(places[c].mem_type != TTM_PL_VRAM))
+    places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+


If BO pinned is not allocated with AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 
should pin and return scattered pages because the RDMA support 
scattered dmabuf. Christian also pointed this out.


If (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&

    bo->placements[i].mem_type == TTM_PL_VRAM)
        o->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;

  r = ttm_bo_validate(>tbo, >placement, );
  if (unlikely(r)) {
  dev_err(adev->dev, "%p pin failed\n", bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..ddbf302878f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,30 @@ static inline u64 
amdgpu_vram_mgr_blocks_size(struct list_head *head)

  return size;
  }
  +static inline unsigned long
+amdgpu_vram_find_pages_per_block(struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ unsigned long bo_flags)
+{
+    unsigned long pages_per_block;
+
+    if (bo_flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
+    place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+    pages_per_block = ~0ul;
+    } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    pages_per_block = HPAGE_PMD_NR;
+#else
+    /* default 

Re: [PATCH] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-16 Thread Paneer Selvam, Arunpravin




On 4/16/2024 3:32 AM, Philip Yang wrote:



On 2024-04-14 10:57, Arunpravin Paneer Selvam wrote:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.
This change will simplify the KFD best effort contiguous VRAM 
allocation, because KFD doesn't need set new GEM_ flag.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.


AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS used in couple of places. For page 
table BO, it is fine as BO size is page size 4K. For 64KB reserved BOs 
and F/W size related BOs, do all allocation happen at driver 
initialization before the VRAM is fragmented?



- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo validation
   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

Signed-off-by: Arunpravin Paneer Selvam
Suggested-by: Christian König
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 14 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 57 +++-
  2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..41926d631563 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,8 +153,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, 
u32 domain)
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  
-		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)

-   places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
c++;
}
  
@@ -899,6 +897,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,

  {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
+   struct ttm_place *places = bo->placements;
+   u32 c = 0;
int r, i;
  
  	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))

@@ -921,16 +921,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
  
  	if (bo->tbo.pin_count) {

uint32_t mem_type = bo->tbo.resource->mem_type;
-   uint32_t mem_flags = bo->tbo.resource->placement;
  
  		if (!(domain & amdgpu_mem_type_to_domain(mem_type)))

return -EINVAL;
  
-		if ((mem_type == TTM_PL_VRAM) &&

-   (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) &&
-   !(mem_flags & TTM_PL_FLAG_CONTIGUOUS))
-   return -EINVAL;
-
This looks like a bug before, but with this patch, the check makes 
sense and is needed.

ttm_bo_pin(>tbo);
  
  		if (max_offset != 0) {

@@ -968,6 +962,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
domain,
bo->placements[i].lpfn = lpfn;
}
  
+	if (domain & AMDGPU_GEM_DOMAIN_VRAM &&

+   !WARN_ON(places[c].mem_type != TTM_PL_VRAM))
+   places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+


If BO pinned is not allocated with AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 
should pin and return scattered pages because the RDMA support 
scattered dmabuf. Christian also pointed this out.


If (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&

    bo->placements[i].mem_type == TTM_PL_VRAM)
        o->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;

r = ttm_bo_validate(>tbo, >placement, );
if (unlikely(r)) {
dev_err(adev->dev, "%p pin failed\n", bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..ddbf302878f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,30 @@ static inline u64 amdgpu_vram_mgr_blocks_size(struct 
list_head *head)
return size;
  }
  
+static inline unsigned long

+amdgpu_vram_find_pages_per_block(struct ttm_buffer_object *tbo,
+const struct ttm_place *place,
+unsigned long bo_flags)
+{
+   unsigned long pages_per_block;
+
+   if (bo_flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
+   place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+   pages_per_block = ~0ul;
+   } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+   pages_per_block = HPAGE_PMD_NR;
+#else
+   /* default to 2MB */
+   pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+  

Re: [PATCH] drm/amdgpu: Modify the contiguous flags behaviour

2024-04-15 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 4/15/2024 7:33 PM, Christian König wrote:

Am 14.04.24 um 16:57 schrieb Arunpravin Paneer Selvam:

Now we have two flags for contiguous VRAM buffer allocation.
If the application request for AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
it would set the ttm place TTM_PL_FLAG_CONTIGUOUS flag in the
buffer's placement function.

This patch will change the default behaviour of the two flags.

When we set AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
- This means contiguous is not mandatory.
- we will try to allocate the contiguous buffer. Say if the
   allocation fails, we fallback to allocate the individual pages.

When we setTTM_PL_FLAG_CONTIGUOUS
- This means contiguous allocation is mandatory.
- we are setting this in amdgpu_bo_pin_restricted() before bo validation
   and check this flag in the vram manager file.
- if this is set, we should allocate the buffer pages contiguously.
   the allocation fails, we return -ENOSPC.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   | 14 +++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 57 +++-
  2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..41926d631563 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -153,8 +153,6 @@ void amdgpu_bo_placement_from_domain(struct 
amdgpu_bo *abo, u32 domain)

  else
  places[c].flags |= TTM_PL_FLAG_TOPDOWN;
  -    if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-    places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
  c++;
  }
  @@ -899,6 +897,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  {
  struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  struct ttm_operation_ctx ctx = { false, false };
+    struct ttm_place *places = bo->placements;
+    u32 c = 0;
  int r, i;
    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
@@ -921,16 +921,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

    if (bo->tbo.pin_count) {
  uint32_t mem_type = bo->tbo.resource->mem_type;
-    uint32_t mem_flags = bo->tbo.resource->placement;
    if (!(domain & amdgpu_mem_type_to_domain(mem_type)))
  return -EINVAL;
  -    if ((mem_type == TTM_PL_VRAM) &&
-    (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) &&
-    !(mem_flags & TTM_PL_FLAG_CONTIGUOUS))
-    return -EINVAL;
-


I think that check here needs to stay.


  ttm_bo_pin(>tbo);
    if (max_offset != 0) {
@@ -968,6 +962,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo 
*bo, u32 domain,

  bo->placements[i].lpfn = lpfn;
  }
  +    if (domain & AMDGPU_GEM_DOMAIN_VRAM &&
+    !WARN_ON(places[c].mem_type != TTM_PL_VRAM))
+    places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+


This needs to go into the loop directly above as something like this 
here:


If (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
    bo->placements[i].mem_type == TTM_PL_VRAM)
        o->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;

This essentially replaces the removed code in 
amdgpu_bo_placement_from_domain() and only applies it during pinning.



  r = ttm_bo_validate(>tbo, >placement, );
  if (unlikely(r)) {
  dev_err(adev->dev, "%p pin failed\n", bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..ddbf302878f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -88,6 +88,30 @@ static inline u64 
amdgpu_vram_mgr_blocks_size(struct list_head *head)

  return size;
  }
  +static inline unsigned long
+amdgpu_vram_find_pages_per_block(struct ttm_buffer_object *tbo,
+ const struct ttm_place *place,
+ unsigned long bo_flags)


Well I think we need a better name here. "find" usually means we look 
up something in a data structure. Maybe 
amdgpu_vram_mgr_calculate_pages_per_block.



+{
+    unsigned long pages_per_block;
+
+    if (bo_flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
+    place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+    pages_per_block = ~0ul;
+    } else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    pages_per_block = HPAGE_PMD_NR;
+#else
+    /* default to 2MB */
+    pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+    pages_per_block = max_t(uint32_t, pages_per_block,
+    tbo->page_alignment);
+    }
+
+    return pages_per_block;
+}
+
  /**
   * DOC: mem_info_vram_total
   *
@@ -451,8 +475,10 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
  struct amdgpu_device *adev = to_amdgpu_device(mgr);
  u64 vis_usage = 0, max_bytes, 

Re: [PATCH v11 1/3] drm/buddy: Implement tracking clear page feature

2024-04-15 Thread Paneer Selvam, Arunpravin

Hi Christian,
Could you please push these patches into drm branch.

Thanks,
Arun.

On 4/15/2024 2:53 AM, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new variables(Matthew)
   - Add a unit test case(Matthew)

v5:
   - remove force merge support to actual range allocation and not to bail
 out when contains && split(Matthew)
   - add range support to force merge function.

v6:
   - modify the alloc_range() function clear page non merged blocks
 allocation(Matthew)
   - correct the list_insert function name(Matthew).

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
Reviewed-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  28 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 363 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..284ebae71cc4 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void 

Re: [PATCH v10 1/3] drm/buddy: Implement tracking clear page feature

2024-04-14 Thread Paneer Selvam, Arunpravin

Hi Matthew,

On 4/10/2024 6:22 PM, Matthew Auld wrote:

On 08/04/2024 16:16, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new 
variables(Matthew)

   - Add a unit test case(Matthew)

v5:
   - remove force merge support to actual range allocation and not to 
bail

 out when contains && split(Matthew)
   - add range support to force merge function.

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 430 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  28 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 368 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..83dbe252f727 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  -static void list_insert_sorted(struct drm_buddy *mm,
-   struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+    struct drm_buddy_block *block)


Why the change here?


  {
  struct drm_buddy_block *node;
  struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  

Re: [PATCH v10 1/3] drm/buddy: Implement tracking clear page feature

2024-04-08 Thread Paneer Selvam, Arunpravin

Hi Matthew,
Could you please review these changes as few clients are waiting for 
these patches.


Thanks,
Arun.
On 4/8/2024 8:46 PM, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new variables(Matthew)
   - Add a unit test case(Matthew)

v5:
   - remove force merge support to actual range allocation and not to bail
 out when contains && split(Matthew)
   - add range support to force merge function.

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 430 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  28 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 368 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 5ebdd6f8f36e..83dbe252f727 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
  }
  
-static void list_insert_sorted(struct drm_buddy *mm,

-  struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
  {
struct drm_buddy_block *node;
struct list_head 

Re: [PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-04-01 Thread Paneer Selvam, Arunpravin

Hi Matthew,

On 3/28/2024 10:18 PM, Matthew Auld wrote:

On 28/03/2024 16:07, Paneer Selvam, Arunpravin wrote:

Hi Matthew,

On 3/26/2024 11:39 PM, Matthew Auld wrote:

On 18/03/2024 21:40, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required 
blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new 
variables(Matthew)

   - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 
++

  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  -static void list_insert_sorted(struct drm_buddy *mm,
-   struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+    struct drm_buddy_block *block)
  {
  struct drm_buddy_block *node;
  struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy 
*mm,

  __list_add(>link, node->l

Re: [PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-03-28 Thread Paneer Selvam, Arunpravin

Hi Matthew,

On 3/26/2024 11:39 PM, Matthew Auld wrote:

On 18/03/2024 21:40, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new 
variables(Matthew)

   - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
  kmem_cache_free(slab_blocks, block);
  }
  -static void list_insert_sorted(struct drm_buddy *mm,
-   struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+    struct drm_buddy_block *block)
  {
  struct drm_buddy_block *node;
  struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct 

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-27 Thread Paneer Selvam, Arunpravin

Hi Alex,

On 3/26/2024 8:23 PM, Alex Deucher wrote:

On Tue, Mar 26, 2024 at 10:01 AM Alex Deucher  wrote:

On Tue, Mar 26, 2024 at 9:59 AM Paneer Selvam, Arunpravin
 wrote:

Hi Alex,

On 3/26/2024 7:08 PM, Alex Deucher wrote:

On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
 wrote:

Add clear page support in vram memory region.

v1(Christian):
- Dont handle clear page as TTM flag since when moving the BO back
  in from GTT again we don't need that.
- Make a specialized version of amdgpu_fill_buffer() which only
  clears the VRAM areas which are not already cleared
- Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
  amdgpu_object.c

v2:
- Modify the function name amdgpu_ttm_* (Alex)
- Drop the delayed parameter (Christian)
- handle amdgpu_res_cleared() just above the size
  calculation (Christian)
- Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
  in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
- Remove buffer clear code in VRAM manager instead change the
  AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
  the DRM_BUDDY_CLEARED flag.
- Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
   6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
   #include "amdgpu.h"
   #include "amdgpu_trace.h"
   #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"

   /**
* DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  if (!amdgpu_bo_support_uswc(bo->flags))
  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;

-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;

  bo->tbo.bdev = >mman.bdev;
  if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,

  if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
  bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;

-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
  if (unlikely(r))
  goto fail_unreserve;

-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
  }
  if (!bp->resv)
  amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
  if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
  return;

-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
  if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
  amdgpu_bo_fence(abo, fence, false);
  dma_fence_put(fence);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
  }
   }

+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur-

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-26 Thread Paneer Selvam, Arunpravin

Hi Alex,

On 3/26/2024 7:08 PM, Alex Deucher wrote:

On Mon, Mar 18, 2024 at 5:47 PM Arunpravin Paneer Selvam
 wrote:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 
Suggested-by: Christian König 
Acked-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"

  /**
   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 if (!amdgpu_bo_support_uswc(bo->flags))
 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;

-   if (adev->ras_enabled)
-   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+   bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;

 bo->tbo.bdev = >mman.bdev;
 if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,

 if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-   struct dma_fence *fence;
+   struct dma_fence *fence = NULL;

-   r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , true);
+   r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
 if (unlikely(r))
 goto fail_unreserve;

-   dma_resv_add_fence(bo->tbo.base.resv, fence,
-  DMA_RESV_USAGE_KERNEL);
-   dma_fence_put(fence);
+   if (fence) {
+   dma_resv_add_fence(bo->tbo.base.resv, fence,
+  DMA_RESV_USAGE_KERNEL);
+   dma_fence_put(fence);
+   }
 }
 if (!bp->resv)
 amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
 if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
 return;

-   r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, , true);
+   r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
 if (!WARN_ON(r)) {
+   struct amdgpu_vram_mgr_resource *vres;
+
+   vres = to_amdgpu_vram_mgr_resource(bo->resource);
+   vres->flags |= DRM_BUDDY_CLEARED;
 amdgpu_bo_fence(abo, fence, false);
 dma_fence_put(fence);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)
 }
  }

+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+   struct drm_buddy_block *block;
+
+   switch (cur->mem_type) {
+   case TTM_PL_VRAM:
+   block = cur->node;
+
+   if (!amdgpu_vram_mgr_is_cleared(block))
+   return false;
+   break;
+   default:
+   return false;
+   }
+
+   return true;
+}
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 

Re: [PATCH v9 1/3] drm/buddy: Implement tracking clear page feature

2024-03-25 Thread Paneer Selvam, Arunpravin

Hi Matthew,
ping?

Thanks,
Arun.
On 3/19/2024 3:10 AM, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

v4:
   - rename the function drm_buddy_defrag with __force_merge.
   - Include __force_merge directly in drm buddy file and remove
 the defrag use in amdgpu driver.
   - Remove list_empty() check(Matthew)
   - Remove unnecessary space, headers and placement of new variables(Matthew)
   - Add a unit test case(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 427 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  16 +-
  6 files changed, 360 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..625a30a6b855 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -38,8 +38,8 @@ static void drm_block_free(struct drm_buddy *mm,
kmem_cache_free(slab_blocks, block);
  }
  
-static void list_insert_sorted(struct drm_buddy *mm,

-  struct drm_buddy_block *block)
+static void list_insert(struct drm_buddy *mm,
+   struct drm_buddy_block *block)
  {
struct drm_buddy_block *node;
struct list_head *head;
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-19 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 3/19/2024 7:17 PM, Christian König wrote:

Am 19.03.24 um 12:41 schrieb Paneer Selvam, Arunpravin:

Hi Christian,

On 3/19/2024 3:58 PM, Christian König wrote:



Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the 
buffers

 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
Acked-by: Felix Kuehling 


Just a few nit picks below, but in general already looks good to me.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 
++-

  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
    /**
   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  if (!amdgpu_bo_support_uswc(bo->flags))
  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  -    if (adev->ras_enabled)
-    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
    bo->tbo.bdev = >mman.bdev;
  if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
    if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
  bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-    struct dma_fence *fence;
+    struct dma_fence *fence = NULL;
  -    r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , 
true);

+    r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
  if (unlikely(r))
  goto fail_unreserve;
  -    dma_resv_add_fence(bo->tbo.base.resv, fence,
-   DMA_RESV_USAGE_KERNEL);
-    dma_fence_put(fence);
+    if (fence) {
+    dma_resv_add_fence(bo->tbo.base.resv, fence,
+   DMA_RESV_USAGE_KERNEL);
+    dma_fence_put(fence);
+    }
  }
  if (!bp->resv)
  amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
ttm_buffer_object *bo)

  if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
  return;
  -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
, true);

+    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
  if (!WARN_ON(r)) {
+    struct amdgpu_vram_mgr_resource *vres;
+
+    vres = to_amdgpu_vram_mgr_resource(bo->resource);
+    vres->flags |= DRM_BUDDY_CLEARED;


Those lines should probably be in the VRAM manager.
This flag is set based on the amdgpu_fill_buffer() function return 
value, can we move the amdgpu_fill_buffer() function call and
DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
manager and does it require to wipe the VRAM buffers here as well

without setting the DRM_BUDDY_CLEARED.


No that won't work. The call to amdgpu_fill_buffer() *must* be here 
because that here is called before the resource is actually freed.


Only setting the vres->flags should probably be moved into 
amdgpu_vram_mgr.c.


So instead of calling that manually here just make a function for it 
to keep the code related to the buddy allocator in one place.

I got it, Thanks.

Regards,
Arun.


Regards,
Christian.




  amdgpu_bo_fence(abo, fence, false);
  dma_fence_put(fence);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index 381101d2bf05.

Re: [PATCH v9 2/3] drm/amdgpu: Enable clear page functionality

2024-03-19 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 3/19/2024 3:58 PM, Christian König wrote:



Am 18.03.24 um 22:40 schrieb Arunpravin Paneer Selvam:

Add clear page support in vram memory region.

v1(Christian):
   - Dont handle clear page as TTM flag since when moving the BO back
 in from GTT again we don't need that.
   - Make a specialized version of amdgpu_fill_buffer() which only
 clears the VRAM areas which are not already cleared
   - Drop the TTM_PL_FLAG_WIPE_ON_RELEASE check in
 amdgpu_object.c

v2:
   - Modify the function name amdgpu_ttm_* (Alex)
   - Drop the delayed parameter (Christian)
   - handle amdgpu_res_cleared() just above the size
 calculation (Christian)
   - Use AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE for clearing the buffers
 in the free path to properly wait for fences etc.. (Christian)

v3(Christian):
   - Remove buffer clear code in VRAM manager instead change the
 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE handling to set
 the DRM_BUDDY_CLEARED flag.
   - Remove ! from amdgpu_res_cleared() check.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Christian König 
Acked-by: Felix Kuehling 


Just a few nit picks below, but in general already looks good to me.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 22 ---
  .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h    | 25 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 61 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |  5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |  6 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h  |  5 ++
  6 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index 8bc79924d171..c92d92b28a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  #include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
    /**
   * DOC: amdgpu_object
@@ -601,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
  if (!amdgpu_bo_support_uswc(bo->flags))
  bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
  -    if (adev->ras_enabled)
-    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+    bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
    bo->tbo.bdev = >mman.bdev;
  if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
@@ -632,15 +632,17 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
    if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
  bo->tbo.resource->mem_type == TTM_PL_VRAM) {
-    struct dma_fence *fence;
+    struct dma_fence *fence = NULL;
  -    r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, , 
true);

+    r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, );
  if (unlikely(r))
  goto fail_unreserve;
  -    dma_resv_add_fence(bo->tbo.base.resv, fence,
-   DMA_RESV_USAGE_KERNEL);
-    dma_fence_put(fence);
+    if (fence) {
+    dma_resv_add_fence(bo->tbo.base.resv, fence,
+   DMA_RESV_USAGE_KERNEL);
+    dma_fence_put(fence);
+    }
  }
  if (!bp->resv)
  amdgpu_bo_unreserve(bo);
@@ -1365,8 +1367,12 @@ void amdgpu_bo_release_notify(struct 
ttm_buffer_object *bo)

  if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
  return;
  -    r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, 
, true);

+    r = amdgpu_fill_buffer(abo, 0, bo->base.resv, , true);
  if (!WARN_ON(r)) {
+    struct amdgpu_vram_mgr_resource *vres;
+
+    vres = to_amdgpu_vram_mgr_resource(bo->resource);
+    vres->flags |= DRM_BUDDY_CLEARED;


Those lines should probably be in the VRAM manager.
This flag is set based on the amdgpu_fill_buffer() function return 
value, can we move the amdgpu_fill_buffer() function call and
DRM_BUDDY_CLEARED flag set lines to amdgpu_vram_mgr_del() in VRAM 
manager and does it require to wipe the VRAM buffers here as well

without setting the DRM_BUDDY_CLEARED.



  amdgpu_bo_fence(abo, fence, false);
  dma_fence_put(fence);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h

index 381101d2bf05..50fcd86e1033 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct 
amdgpu_res_cursor *cur, uint64_t size)

  }
  }
  +/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+    struct drm_buddy_block *block;
+
+    switch (cur->mem_type) {
+    case TTM_PL_VRAM:
+    block = cur->node;
+
+    if (!amdgpu_vram_mgr_is_cleared(block))
+    return false;

Re: [PATCH v8 1/3] drm/buddy: Implement tracking clear page feature

2024-03-07 Thread Paneer Selvam, Arunpravin

Hi Matthew,

On 3/6/2024 11:19 PM, Matthew Auld wrote:

On 04/03/2024 16:32, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the 
list(Christian)

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.

   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 


Is there a unit test for this? What about maybe something roughly like:

- Pick small random mm_size which is not always power-of-two.
- Allocate and free some random portion of the address space, 
returning those pages as non-dirty. Then do another cycle and randomly 
alternate between requesting dirty and non-dirty pages (not going over 
the limit you freed as non-dirty), putting that into two separate 
lists. Loop over both lists at the end checking that the dirty list is 
indeed all dirty pages and vice versa. Free it all again, keeping the 
dirty/clear status.
- Also try to go over the clear limit for some allocation. The 
allocation should never fail with reasonable page-size.
- Test the defrag/force_merge interface. Clean the mm or create new 
one. Intentionally fragment the address space, by creating two 
alternating lists. Free both lists, one as dirty the other as clean. 
Try to allocate double the previous size with matching min_page_size. 
Should fail. Call force_merge. Should now succeed. Also check that the 
page is always dirty after force_merge. Free the page as dirty, then 
repeat the whole thing, doubling the page-size until you hit max_order.
- Make sure we also call fini() with some part of the address space 
left as non-dirty. Should not trigger any warnings.


I think would be good to consider, but not a blocker or anything. Some 
comments below, otherwise I think looks good.

Yes. It is good to have a unit test for this feature. I will send the patch.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 294 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  22 +-
  6 files changed, 290 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, 

Re: [PATCH v8 1/3] drm/buddy: Implement tracking clear page feature

2024-03-06 Thread Paneer Selvam, Arunpravin

Hi Matthew,
Ping?

Thanks,
Arun.

On 3/4/2024 10:02 PM, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as cleared,
   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

- Add a function to support defragmentation.

v1:
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list(Christian)
   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)(Christian)
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of blocks
 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in arguments.
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

v3:
   - fix Gitlab user reported lockup issue.
   - Keep DRM_BUDDY_HEADER_CLEAR define sorted(Matthew)
   - modify to pass the root order instead max_order in fini()
 function(Matthew)
   - change bool 1 to true(Matthew)
   - add check if min_block_size is power of 2(Matthew)
   - modify the min_block_size datatype to u64(Matthew)

Signed-off-by: Arunpravin Paneer Selvam 
Signed-off-by: Matthew Auld 
Suggested-by: Christian König 
Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 294 +++---
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c|  18 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  22 +-
  6 files changed, 290 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
return 0;
  
  error_free_blocks:

-   drm_buddy_free_list(mm, >blocks);
+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  error_fini:
ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager 
*man,
  
  	amdgpu_vram_mgr_do_reserve(man);
  
-	drm_buddy_free_list(mm, >blocks);

+   drm_buddy_free_list(mm, >blocks, 0);
mutex_unlock(>lock);
  
  	atomic64_sub(vis_usage, >vis_usage);

@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
  
  	list_for_each_entry_safe(rsv, temp, >reserved_pages, blocks) {

-   drm_buddy_free_list(>mm, >allocated);
+   drm_buddy_free_list(>mm, >allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c4222b886db7..40131ed9b0cd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
__list_add(>link, node->link.prev, >link);
  }
  
+static void clear_reset(struct drm_buddy_block *block)

+{
+   block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+   block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -186,11 +196,21 @@ EXPORT_SYMBOL(drm_buddy_init);
   */
  void drm_buddy_fini(struct drm_buddy *mm)
  {
+   u64 root_size, size;
+   unsigned int order;
int i;
  
+	size = mm->size;

+
for (i = 0; i < mm->n_roots; ++i) {
+   order = ilog2(size) - ilog2(mm->chunk_size);
+   root_size = mm->chunk_size << order;
+   drm_buddy_defrag(mm, root_size);
+
WARN_ON(!drm_buddy_block_is_free(mm->roots[i]));

Re: [PATCH v8 3/3] drm/buddy: Add user for defragmentation

2024-03-06 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 3/5/2024 5:41 PM, Christian König wrote:

Am 05.03.24 um 12:14 schrieb Paneer Selvam, Arunpravin:

On 3/5/2024 4:33 PM, Paneer Selvam, Arunpravin wrote:

Hi Christian,

On 3/4/2024 10:09 PM, Christian König wrote:

Am 04.03.24 um 17:32 schrieb Arunpravin Paneer Selvam:

Add amdgpu driver as user for the drm buddy
defragmentation.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++--
  drivers/gpu/drm/drm_buddy.c  |  1 +
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

 min_block_size,
 >blocks,
 vres->flags);
-    if (unlikely(r))
-    goto error_free_blocks;
+    if (unlikely(r)) {
+    if (r == -ENOSPC) {
+    drm_buddy_defrag(mm, min_block_size);
+    r = drm_buddy_alloc_blocks(mm, fpfn,
+   lpfn,
+   size,
+   min_block_size,
+   >blocks,
+   vres->flags);


That doesn't looks like something we should do.

We might fallback when contiguous memory is requested, but 
certainly not on normal allocation failure.
yes, defrag here not useful for normal allocations. But worried 
about the bigger min_block_size normal allocations.
In such cases, I think we should move this drm_buddy_defrag() call 
into buddy allocator file. For example if the required
size is 1024KiB and if min_block_size is 256KiB, the allocator first 
tries to find the 1024KiB block, when there is no single 1024KiB block,
the allocator goes one level below in freelist and tries to search 
for two 512KiB blocks and goes on. At one point of time if we have 
less space,
we might go further levels below to search four 256KiB blocks to 
satisfy the request.


Assuming if the allocator cannot find the first 256KiB block, that 
time I think we might need to merge the two 128KiB blocks
through defragmentation function. And again for the second 256KiB 
block, we might need to call the defragmentation again to
merge two 128KiB blocks or four 64KiB blocks to form minimum 
alignment size of 256KiB. This goes on for the third and fourth
256KiB blocks to complete the required size allocation of 1024KiB. 
Please let me know if my understanding is not correct.


I don't think we should do that. We essentially have to support two 
different use cases:


1. Non contiguous allocation with 2MiB min_block_size for everything 
larger than 2MiB. Using a block size as large as possible is 
desirable, but not something we enforce.


2. Contiguous allocations for display, firmware etc.. Here we need to 
enforce a large block size and can live with the additional overhead 
caused by force merging.

Thanks. I will make the changes accordingly.




As you have suggested we can also rename this as force merge or some 
other names.


Yeah, but just an suggestion. You are way deeper in the code and 
handling than I'm, so feel free to name it whatever you think fits best.

Sure :)

Thanks,
Arun.


Regards,
Christian.




Thanks,
Arun.


Thanks,
Arun.


Regards,
Christian.


+    if (unlikely(r))
+    goto error_free_blocks;
+    } else {
+    goto error_free_blocks;
+    }
+    }
    if (size > remaining_size)
  remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c 
b/drivers/gpu/drm/drm_buddy.c

index 40131ed9b0cd..19440f8caec0 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -396,6 +396,7 @@ void drm_buddy_defrag(struct drm_buddy *mm,
  }
  }
  }
+EXPORT_SYMBOL(drm_buddy_defrag);
    /**
   * drm_buddy_free_block - free a block












Re: [PATCH v8 3/3] drm/buddy: Add user for defragmentation

2024-03-05 Thread Paneer Selvam, Arunpravin




On 3/5/2024 4:33 PM, Paneer Selvam, Arunpravin wrote:

Hi Christian,

On 3/4/2024 10:09 PM, Christian König wrote:

Am 04.03.24 um 17:32 schrieb Arunpravin Paneer Selvam:

Add amdgpu driver as user for the drm buddy
defragmentation.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++--
  drivers/gpu/drm/drm_buddy.c  |  1 +
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

 min_block_size,
 >blocks,
 vres->flags);
-    if (unlikely(r))
-    goto error_free_blocks;
+    if (unlikely(r)) {
+    if (r == -ENOSPC) {
+    drm_buddy_defrag(mm, min_block_size);
+    r = drm_buddy_alloc_blocks(mm, fpfn,
+   lpfn,
+   size,
+   min_block_size,
+   >blocks,
+   vres->flags);


That doesn't looks like something we should do.

We might fallback when contiguous memory is requested, but certainly 
not on normal allocation failure.
yes, defrag here not useful for normal allocations. But worried about 
the bigger min_block_size normal allocations.
In such cases, I think we should move this drm_buddy_defrag() call 
into buddy allocator file. For example if the required
size is 1024KiB and if min_block_size is 256KiB, the allocator first 
tries to find the 1024KiB block, when there is no single 1024KiB block,
the allocator goes one level below in freelist and tries to search for 
two 512KiB blocks and goes on. At one point of time if we have less 
space,
we might go further levels below to search four 256KiB blocks to 
satisfy the request.


Assuming if the allocator cannot find the first 256KiB block, that 
time I think we might need to merge the two 128KiB blocks
through defragmentation function. And again for the second 256KiB 
block, we might need to call the defragmentation again to
merge two 128KiB blocks or four 64KiB blocks to form minimum alignment 
size of 256KiB. This goes on for the third and fourth
256KiB blocks to complete the required size allocation of 1024KiB. 
Please let me know if my understanding is not correct.


As you have suggested we can also rename this as force merge or some 
other names.


Thanks,
Arun.


Thanks,
Arun.


Regards,
Christian.


+    if (unlikely(r))
+    goto error_free_blocks;
+    } else {
+    goto error_free_blocks;
+    }
+    }
    if (size > remaining_size)
  remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 40131ed9b0cd..19440f8caec0 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -396,6 +396,7 @@ void drm_buddy_defrag(struct drm_buddy *mm,
  }
  }
  }
+EXPORT_SYMBOL(drm_buddy_defrag);
    /**
   * drm_buddy_free_block - free a block








Re: [PATCH v8 3/3] drm/buddy: Add user for defragmentation

2024-03-05 Thread Paneer Selvam, Arunpravin

Hi Christian,

On 3/4/2024 10:09 PM, Christian König wrote:

Am 04.03.24 um 17:32 schrieb Arunpravin Paneer Selvam:

Add amdgpu driver as user for the drm buddy
defragmentation.

Signed-off-by: Arunpravin Paneer Selvam 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++--
  drivers/gpu/drm/drm_buddy.c  |  1 +
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

 min_block_size,
 >blocks,
 vres->flags);
-    if (unlikely(r))
-    goto error_free_blocks;
+    if (unlikely(r)) {
+    if (r == -ENOSPC) {
+    drm_buddy_defrag(mm, min_block_size);
+    r = drm_buddy_alloc_blocks(mm, fpfn,
+   lpfn,
+   size,
+   min_block_size,
+   >blocks,
+   vres->flags);


That doesn't looks like something we should do.

We might fallback when contiguous memory is requested, but certainly 
not on normal allocation failure.
yes, defrag here not useful for normal allocations. But worried about 
the bigger min_block_size normal allocations.
In such cases, I think we should move this drm_buddy_defrag() call into 
buddy allocator file. For example if the required
size is 1024KiB and if min_block_size is 256KiB, the allocator first 
tries to find the 1024KiB block, when there is no single 1024KiB block,
the allocator goes one level below in freelist and tries to search for 
two 512KiB blocks and goes on. At one point of time if we have less space,
we might go further levels below to search four 256KiB blocks to satisfy 
the request.


Assuming if the allocator cannot find the first 256KiB block, that time 
I think we might need to merge the two 128KiB blocks
through defragmentation function. And again for the second 256KiB block, 
we might need to call the defragmentation again to
merge two 128KiB blocks or four 64KiB blocks to form minimum alignment 
size of 256KiB. This goes on for the third and fourth
256KiB blocks to complete the required size allocation of 1024KiB. 
Please let me know if my understanding is not correct.


Thanks,
Arun.


Regards,
Christian.


+    if (unlikely(r))
+    goto error_free_blocks;
+    } else {
+    goto error_free_blocks;
+    }
+    }
    if (size > remaining_size)
  remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 40131ed9b0cd..19440f8caec0 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -396,6 +396,7 @@ void drm_buddy_defrag(struct drm_buddy *mm,
  }
  }
  }
+EXPORT_SYMBOL(drm_buddy_defrag);
    /**
   * drm_buddy_free_block - free a block






Re: [PATCH v7 3/3] drm/buddy: Add defragmentation support

2024-03-04 Thread Paneer Selvam, Arunpravin

Hi Matthew,

On 2/22/2024 12:12 AM, Matthew Auld wrote:

On 21/02/2024 12:18, Arunpravin Paneer Selvam wrote:

Add a function to support defragmentation.

v1:
   - Defragment the memory beginning from min_order
 till the required memory space is available.

v2(Matthew):
   - add amdgpu user for defragmentation
   - add a warning if the two blocks are incompatible on
 defragmentation
   - call full defragmentation in the fini() function
   - place a condition to test if min_order is equal to 0
   - replace the list with safe_reverse() variant as we might
 remove the block from the list.

Signed-off-by: Arunpravin Paneer Selvam 


Suggested-by: Matthew Auld 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 17 +++-
  drivers/gpu/drm/drm_buddy.c  | 93 +---
  include/drm/drm_buddy.h  |  3 +
  3 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index e494f5bf136a..cff8a526c622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -533,8 +533,21 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

 min_block_size,
 >blocks,
 vres->flags);
-    if (unlikely(r))
-    goto error_free_blocks;
+    if (unlikely(r)) {
+    if (r == -ENOSPC) {
+    drm_buddy_defrag(mm, min_block_size);
+    r = drm_buddy_alloc_blocks(mm, fpfn,
+   lpfn,
+   size,
+   min_block_size,
+   >blocks,
+   vres->flags);
+    if (unlikely(r))
+    goto error_free_blocks;
+    } else {
+    goto error_free_blocks;
+    }
+    }
    if (size > remaining_size)
  remaining_size = 0;
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 18e004fa39d3..56bd1560fbcd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -203,6 +203,8 @@ void drm_buddy_fini(struct drm_buddy *mm)
  drm_block_free(mm, mm->roots[i]);
  }
  +    drm_buddy_defrag(mm, mm->chunk_size << mm->max_order);


I think this needs to be called higher up, otherwise we blow up with 
the WARN, plus we just freed the root(s). There is also the case with 
non-power-of-two VRAM size, in which case you get multiple roots and 
max_order is just the largest root and not entire address space. I 
guess do this in the loop above and use the root order instead?


Also this should be done as part of the first patch and then in this 
patch it is just a case of exporting it. Every commit should ideally 
be functional by itself.
You mean we move the above change in drm_buddy_fini function and 
drm_buddy_defrag function as part of first patch.
And just we add export function and add amdgpu user in this patch. Is my 
understanding correct?


Thanks,
Arun.



+
  WARN_ON(mm->avail != mm->size);
    kfree(mm->roots);
@@ -276,25 +278,39 @@ drm_get_buddy(struct drm_buddy_block *block)
  }
  EXPORT_SYMBOL(drm_get_buddy);
  -static void __drm_buddy_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static unsigned int __drm_buddy_free(struct drm_buddy *mm,
+ struct drm_buddy_block *block,
+ bool defrag)
  {
+    unsigned int order, block_order;
  struct drm_buddy_block *parent;
  +    block_order = drm_buddy_block_order(block);
+
  while ((parent = block->parent)) {
-    struct drm_buddy_block *buddy;
+    struct drm_buddy_block *buddy = NULL;
    buddy = __get_buddy(block);
    if (!drm_buddy_block_is_free(buddy))
  break;
  -    if (drm_buddy_block_is_clear(block) !=
-    drm_buddy_block_is_clear(buddy))
-    break;
+    if (!defrag) {
+    /*
+ * Check the block and its buddy clear state and exit
+ * the loop if they both have the dissimilar state.
+ */
+    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
  -    if (drm_buddy_block_is_clear(block))
-    mark_cleared(parent);
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+    }
+
+    WARN_ON(defrag &&
+    (drm_buddy_block_is_clear(block) ==
+ drm_buddy_block_is_clear(buddy)));
    list_del(>link);
  @@ -304,8 +320,57 @@ static void __drm_buddy_free(struct drm_buddy 
*mm,

  block = parent;
  }
  -    mark_free(mm, block);
+    order = drm_buddy_block_order(block);
+    if (block_order != order)
+    mark_free(mm, block);
+
+    return order;
+}
+
+/**
+ * 

Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-21 Thread Paneer Selvam, Arunpravin



On 2/16/2024 5:33 PM, Matthew Auld wrote:

On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.


Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 


Probably needs a new unit test.

I think we are missing something to forcefully re-merge everything at 
fini()? In theory we can just call the defrag routine. Otherwise we 
might trigger various warnings since the root(s) might still be split.


Also one nit below. Otherwise I think looks good.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 192 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  18 +-
  6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(>link);
    drm_block_free(mm, block);
@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
  BUG_ON(!drm_buddy_block_is_allocated(block));
  mm->avail += drm_buddy_block_size(mm, block);
+    if (drm_buddy_block_is_clear(block))
+    mm->clear_avail += drm_buddy_block_size(mm, block);
+
  

Re: [PATCH v6 1/3] drm/buddy: Implement tracking clear page feature

2024-02-21 Thread Paneer Selvam, Arunpravin



On 2/16/2024 5:33 PM, Matthew Auld wrote:

On 08/02/2024 15:49, Arunpravin Paneer Selvam wrote:

- Add tracking clear page feature.

- Driver should enable the DRM_BUDDY_CLEARED flag if it
   successfully clears the blocks in the free path. On the otherhand,
   DRM buddy marks each block as cleared.

- Track the available cleared pages size

- If driver requests cleared memory we prefer cleared memory
   but fallback to uncleared if we can't find the cleared blocks.
   when driver requests uncleared memory we try to use uncleared but
   fallback to cleared memory if necessary.

- When a block gets freed we clear it and mark the freed block as 
cleared,

   when there are buddies which are cleared as well we can merge them.
   Otherwise, we prefer to keep the blocks as separated.

v1: (Christian)
   - Depends on the flag check DRM_BUDDY_CLEARED, enable the block as
 cleared. Else, reset the clear flag for each block in the list.

   - For merging the 2 cleared blocks compare as below,
 drm_buddy_is_clear(block) != drm_buddy_is_clear(buddy)

v2: (Matthew)
   - Add a wrapper drm_buddy_free_list_internal for the freeing of 
blocks

 operation within drm buddy.
   - Write a macro block_incompatible() to allocate the required blocks.
   - Update the xe driver for the drm_buddy_free_list change in 
arguments.


Signed-off-by: Arunpravin Paneer Selvam 


Signed-off-by: Matthew Auld 
Suggested-by: Christian König 


Probably needs a new unit test.

Sure, I am working on it. I will send in a separate patch.


I think we are missing something to forcefully re-merge everything at 
fini()? In theory we can just call the defrag routine. Otherwise we 
might trigger various warnings since the root(s) might still be split.


I have added the full defrag in the fini() function. Please review the 
patch number 3.


Thanks,

Arun.



Also one nit below. Otherwise I think looks good.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  |   6 +-
  drivers/gpu/drm/drm_buddy.c   | 192 ++
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |   6 +-
  drivers/gpu/drm/tests/drm_buddy_test.c    |  10 +-
  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |   4 +-
  include/drm/drm_buddy.h   |  18 +-
  6 files changed, 187 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 8db880244324..c0c851409241 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -571,7 +571,7 @@ static int amdgpu_vram_mgr_new(struct 
ttm_resource_manager *man,

  return 0;
    error_free_blocks:
-    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
  error_fini:
  ttm_resource_fini(man, >base);
@@ -604,7 +604,7 @@ static void amdgpu_vram_mgr_del(struct 
ttm_resource_manager *man,

    amdgpu_vram_mgr_do_reserve(man);
  -    drm_buddy_free_list(mm, >blocks);
+    drm_buddy_free_list(mm, >blocks, 0);
  mutex_unlock(>lock);
    atomic64_sub(vis_usage, >vis_usage);
@@ -912,7 +912,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device 
*adev)

  kfree(rsv);
    list_for_each_entry_safe(rsv, temp, >reserved_pages, 
blocks) {

-    drm_buddy_free_list(>mm, >allocated);
+    drm_buddy_free_list(>mm, >allocated, 0);
  kfree(rsv);
  }
  if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..33ad0cfbd54c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -57,6 +57,16 @@ static void list_insert_sorted(struct drm_buddy *mm,
  __list_add(>link, node->link.prev, >link);
  }
  +static void clear_reset(struct drm_buddy_block *block)
+{
+    block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+}
+
+static void mark_cleared(struct drm_buddy_block *block)
+{
+    block->header |= DRM_BUDDY_HEADER_CLEAR;
+}
+
  static void mark_allocated(struct drm_buddy_block *block)
  {
  block->header &= ~DRM_BUDDY_HEADER_STATE;
@@ -223,6 +233,12 @@ static int split_block(struct drm_buddy *mm,
  mark_free(mm, block->left);
  mark_free(mm, block->right);
  +    if (drm_buddy_block_is_clear(block)) {
+    mark_cleared(block->left);
+    mark_cleared(block->right);
+    clear_reset(block);
+    }
+
  mark_split(block);
    return 0;
@@ -273,6 +289,13 @@ static void __drm_buddy_free(struct drm_buddy *mm,
  if (!drm_buddy_block_is_free(buddy))
  break;
  +    if (drm_buddy_block_is_clear(block) !=
+    drm_buddy_block_is_clear(buddy))
+    break;
+
+    if (drm_buddy_block_is_clear(block))
+    mark_cleared(parent);
+
  list_del(>link);
    drm_block_free(mm, block);
@@ -295,26 +318,61 @@ void drm_buddy_free_block(struct drm_buddy *mm,
  {
  BUG_ON(!drm_buddy_block_is_allocated(block));
  

RE: [PATCH] [RFC] drm/drm_buddy fails to initialize on 32-bit architectures

2023-03-09 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only - General]

Hi Luis,

Sorry, I missed this one. Give me some time. I will check on it.

Regards,
Arun
-Original Message-
From: Luís Mendes  
Sent: Thursday, March 9, 2023 3:43 PM
To: Koenig, Christian 
Cc: a...@linux-foundation.org; amd-gfx list ; 
Linux Kernel Mailing List ; Paneer Selvam, 
Arunpravin 
Subject: Re: [PATCH] [RFC] drm/drm_buddy fails to initialize on 32-bit 
architectures

Hi,

Ping? This is actually a regression.
If there is no one available to work this, maybe I can have a look in my spare 
time, in accordance with your suggestion.

Regards,
Luís

On Tue, Jan 3, 2023 at 8:44 AM Christian König  wrote:
>
> Am 25.12.22 um 20:39 schrieb Luís Mendes:
> > Re-sending with the correct  linux-kernel mailing list email address.
> > Sorry for the inconvenience.
> >
> > The proposed patch fixes the issue and allows amdgpu to work again 
> > on armhf with a AMD RX 550 card, however it may not be the best 
> > solution for the issue, as detailed below.
> >
> > include/log2.h defined macros rounddown_pow_of_two(...) and
> > roundup_pow_of_two(...) do not handle 64-bit values on 32-bit 
> > architectures (tested on armv9 armhf machine) causing
> > drm_buddy_init(...) to fail on BUG_ON with an underflow on the order 
> > value, thus impeding amdgpu to load properly (no GUI).
> >
> > One option is to modify rounddown_pow_of_two(...) to detect if the 
> > variable takes 32 bits or less and call 
> > __rounddown_pow_of_two_u32(u32
> > n) or if the variable takes more space than 32 bits, then call
> > __rounddown_pow_of_two_u64(u64 n). This would imply renaming 
> > __rounddown_pow_of_two(unsigne d long n) to
> > __rounddown_pow_of_two_u32(u32 n) and add a new function
> > __rounddown_pow_of_two_u64(u64 n). This would be the most 
> > transparent solution, however there a few complications, and they are:
> > - that the mm subsystem will fail to link on armhf with an undefined 
> > reference on __aeabi_uldivmod
> > - there a few drivers that directly call __rounddown_pow_of_two(...)
> > - that other drivers and subsystems generate warnings
> >
> > So this alternate solution was devised which avoids touching 
> > existing code paths, and just updates drm_buddy which seems to be 
> > the only driver that is failing, however I am not sure if this is 
> > the proper way to go. So I would like to get a second opinion on 
> > this, by those who know.
> >
> > /include/linux/log2.h
> > /drivers/gpu/drm/drm_buddy.c
> >
> > Signed-off-by: Luís Mendes 
> >> 8--8<
> > diff -uprN linux-next/drivers/gpu/drm/drm_buddy.c
> > linux-nextLM/drivers/gpu/drm/drm_buddy.c
> > --- linux-next/drivers/gpu/drm/drm_buddy.c2022-12-25
> > 16:29:26.0 +
> > +++ linux-nextLM/drivers/gpu/drm/drm_buddy.c2022-12-25
> > 17:04:32.136007116 +
> > @@ -128,7 +128,7 @@ int drm_buddy_init(struct drm_buddy *mm,
> >   unsigned int order;
> >   u64 root_size;
> >
> > -root_size = rounddown_pow_of_two(size);
> > +root_size = rounddown_pow_of_two_u64(size);
> >   order = ilog2(root_size) - ilog2(chunk_size);
>
> I think this can be handled much easier if keep around the root_order 
> instead of the root_size in the first place.
>
> Cause ilog2() does the right thing even for non power of two values 
> and so we just need the order for the offset subtraction below.
>
> Arun can you take a closer look at this?
>
> Regards,
> Christian.
>
> >
> >   root = drm_block_alloc(mm, NULL, order, offset); diff 
> > -uprN linux-next/include/linux/log2.h linux-nextLM/include/linux/log2.h
> > --- linux-next/include/linux/log2.h2022-12-25 16:29:29.0 +
> > +++ linux-nextLM/include/linux/log2.h2022-12-25 17:00:34.319901492 +
> > @@ -58,6 +58,18 @@ unsigned long __roundup_pow_of_two(unsig
> >   }
> >
> >   /**
> > + * __roundup_pow_of_two_u64() - round up to nearest power of two
> > + * (unsgined 64-bits precision version)
> > + * @n: value to round up
> > + */
> > +static inline __attribute__((const))
> > +u64 __roundup_pow_of_two_u64(u64 n) {
> > +return 1ULL << fls64(n - 1);
> > +}
> > +
> > +
> > +/**
> >* __rounddown_pow_of_two() - round down to nearest power of two
> >* @n: value to round down
> >*/
> > @@ -68,6 +80,17 @@ unsigned long __rounddown_pow_of_two(uns
> >   }
> >
> >   /**
> > + * __rounddown_pow_of_two_u64() - round down to nearest power of 
> 

Re: [PATCH] drm/amdgpu: try allowed domain when pin framebuffer failed

2022-12-07 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only - General]

Hi Christian,

If this change is possible, I think it would improve the performance by 
eliminating the frequent BO eviction when there is a memory pressure.

Thanks,
Arun.

Get Outlook for Android


Re: [PATCH] drm/amdgpu: try allowed domain when pin framebuffer failed

2022-12-07 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only - General]

Jesse informed me that this patch solves the issue, he tested all the cases.

Thanks,
Arun

Get Outlook for Android<https://aka.ms/AAb9ysg>

From: Christian König 
Sent: Wednesday, December 7, 2022 8:53:25 PM
To: Alex Deucher ; Zhang, Jesse(Jie) 

Cc: Zhang, Yifan ; amd-gfx 
; Paneer Selvam, Arunpravin 
; amd-gfx@lists.freedesktop.org 
; Deucher, Alexander 
; Koenig, Christian 
Subject: Re: [PATCH] drm/amdgpu: try allowed domain when pin framebuffer failed

I would go a step further and just allow GTT domain on ASICs != CARRIZO
| STONEY.

I can't see a good reason we should still have any limitation here, VRAM
doesn't have any advantage any more as far as I know.

Christian.

Am 07.12.22 um 16:10 schrieb Alex Deucher:
> Does this patch fix the problem?
>
> Alex
>
> On Wed, Dec 7, 2022 at 2:27 AM Zhang, Jesse(Jie)  wrote:
>> [AMD Official Use Only - General]
>>
>>
>>  drm/amdgpu: try allowed domain when pin framebuffer failed.
>>
>>
>>
>>  [WHY]
>>
>>
>>
>>  in some scenarios, the allocate memory often failed. such as do hot 
>> plug or play games.
>>
>>  so we can try allowed domain, if the preferred domain cannot allocate 
>> memory.
>>
>>
>>
>>  Signed-off-by: jie1zhan jesse.zh...@amd.com
>>
>>  Change-Id: I4b62e2ff072d02c515f901000a5789339d481273
>>
>>
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>
>> index 1ae0c8723348..05fcaf7f9d92 100644
>>
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>
>> @@ -39,6 +39,7 @@
>>
>> #include "amdgpu.h"
>>
>> #include "amdgpu_trace.h"
>>
>> #include "amdgpu_amdkfd.h"
>>
>> +#include "amdgpu_display.h"
>>
>>
>>
>> /**
>>
>>* DOC: amdgpu_object
>>
>> @@ -942,8 +943,14 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 
>> domain,
>>
>>  bo->placements[i].lpfn = lpfn;
>>
>>  }
>>
>>
>>
>> +   retry:
>>
>>  r = ttm_bo_validate(>tbo, >placement, );
>>
>>  if (unlikely(r)) {
>>
>> +   //try allowed domain when pin failed. just a workaround.
>>
>> +   if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) 
>> {
>>
>> +   amdgpu_bo_placement_from_domain(bo, 
>> bo->allowed_domains);
>>
>> +   goto retry;
>>
>> +   }
>>
>>  dev_err(adev->dev, "%p pin failed\n", bo);
>>
>>  goto error;
>>
>>  }



RE: [PATCH] drm/ttm: fix bulk move handling v2

2022-06-14 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only - General]

Hi Christian,

I verified the patch, I don’t see the crashes.

Reviewed-by: Arunpravin Paneer Selvam 

Thanks,
Arun
-Original Message-
From: Christian König  
Sent: Monday, June 13, 2022 1:38 PM
To: Paneer Selvam, Arunpravin ; 
m...@fireburn.co.uk; amd-gfx@lists.freedesktop.org; 
dri-de...@lists.freedesktop.org
Cc: Koenig, Christian ; Tuikov, Luben 

Subject: [PATCH] drm/ttm: fix bulk move handling v2

The resource must be on the LRU before ttm_lru_bulk_move_add() is called and we 
need to check if the BO is pinned or not before adding it.

Additional to that we missed taking the LRU spinlock in ttm_bo_unpin().

Signed-off-by: Christian König 
Acked-by: Luben Tuikov 
---
 drivers/gpu/drm/ttm/ttm_bo.c   | 22 -
 drivers/gpu/drm/ttm/ttm_resource.c | 52 +-
 include/drm/ttm/ttm_resource.h |  8 ++---
 3 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 
296af2b89951..0e210df65c30 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -103,11 +103,11 @@ void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo,
return;
 
spin_lock(>bdev->lru_lock);
-   if (bo->bulk_move && bo->resource)
-   ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+   if (bo->resource)
+   ttm_resource_del_bulk_move(bo->resource, bo);
bo->bulk_move = bulk;
-   if (bo->bulk_move && bo->resource)
-   ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+   if (bo->resource)
+   ttm_resource_add_bulk_move(bo->resource, bo);
spin_unlock(>bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_set_bulk_move);
@@ -683,8 +683,11 @@ void ttm_bo_pin(struct ttm_buffer_object *bo)  {
dma_resv_assert_held(bo->base.resv);
WARN_ON_ONCE(!kref_read(>kref));
-   if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
-   ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+   spin_lock(>bdev->lru_lock);
+   if (bo->resource)
+   ttm_resource_del_bulk_move(bo->resource, bo);
+   ++bo->pin_count;
+   spin_unlock(>bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_pin);
 
@@ -701,8 +704,11 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!bo->pin_count))
return;
 
-   if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
-   ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+   spin_lock(>bdev->lru_lock);
+   --bo->pin_count;
+   if (bo->resource)
+   ttm_resource_add_bulk_move(bo->resource, bo);
+   spin_unlock(>bdev->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_unpin);
 
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
b/drivers/gpu/drm/ttm/ttm_resource.c
index 65889b3caf50..20f9adcc3235 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,8 +91,8 @@ static void ttm_lru_bulk_move_pos_tail(struct 
ttm_lru_bulk_move_pos *pos,  }
 
 /* Add the resource to a bulk_move cursor */ -void 
ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
-  struct ttm_resource *res)
+static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
 {
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
 
@@ -105,8 +105,8 @@ void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,  
}
 
 /* Remove the resource from a bulk_move range */ -void 
ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
-  struct ttm_resource *res)
+static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
 {
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
 
@@ -122,6 +122,22 @@ void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
}
 }
 
+/* Add the resource to a bulk move if the BO is configured for it */ 
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+   struct ttm_buffer_object *bo)
+{
+   if (bo->bulk_move && !bo->pin_count)
+   ttm_lru_bulk_move_add(bo->bulk_move, res); }
+
+/* Remove the resource from a bulk move if the BO is configured for it 
+*/ void ttm_resource_del_bulk_move(struct ttm_resource *res,
+   struct ttm_buffer_object *bo)
+{
+   if (bo->bulk_move && !bo->pin_count)
+   ttm_lru_bulk_move_del(bo->bulk_move, res); }
+
 /* Move a resource to the LRU or bulk tail */  void 
ttm_resource_move_to_lru_tail(struct ttm_resource *res)  { @@ -169,15 +185,14 
@@ void ttm_resource_init(

RE: [PATCH v12] drm/amdgpu: add drm buddy support to amdgpu

2022-05-28 Thread Paneer Selvam, Arunpravin
[Public]

Hi,

After investigating quite some time on this issue, found freeze problem is not 
with the amdgpu part of buddy allocator patch as the patch doesn’t throw any 
issues when applied separately on top of the stable base of drm-next. After 
digging more into this issue, the below patch seems to be the cause of this 
problem,

drm/ttm: rework bulk move handling v5
https://cgit.freedesktop.org/drm/drm/commit/?id=fee2ede155423b0f7a559050a39750b98fe9db69

when this patch applied on top of the stable (working version) of drm-next 
without buddy allocator patch, we can see multiple issues listed below, each 
thrown randomly at every GravityMark run, 1. general protection fault at 
ttm_lru_bulk_move_tail() 2. NULL pointer deference at ttm_lru_bulk_move_tail() 
3. NULL pointer deference at ttm_resource_init().

Regards,
Arun.
-Original Message-
From: Alex Deucher  
Sent: Monday, May 16, 2022 8:36 PM
To: Mike Lothian 
Cc: Paneer Selvam, Arunpravin ; Intel Graphics 
Development ; amd-gfx list 
; Maling list - DRI developers 
; Deucher, Alexander 
; Koenig, Christian ; 
Matthew Auld 
Subject: Re: [PATCH v12] drm/amdgpu: add drm buddy support to amdgpu

On Mon, May 16, 2022 at 8:40 AM Mike Lothian  wrote:
>
> Hi
>
> The merge window for 5.19 will probably be opening next week, has 
> there been any progress with this bug?

It took a while to find a combination of GPUs that would repro the issue, but 
now that we can, it is still being investigated.

Alex

>
> Thanks
>
> Mike
>
> On Mon, 2 May 2022 at 17:31, Mike Lothian  wrote:
> >
> > On Mon, 2 May 2022 at 16:54, Arunpravin Paneer Selvam 
> >  wrote:
> > >
> > >
> > >
> > > On 5/2/2022 8:41 PM, Mike Lothian wrote:
> > > > On Wed, 27 Apr 2022 at 12:55, Mike Lothian  wrote:
> > > >> On Tue, 26 Apr 2022 at 17:36, Christian König 
> > > >>  wrote:
> > > >>> Hi Mike,
> > > >>>
> > > >>> sounds like somehow stitching together the SG table for PRIME 
> > > >>> doesn't work any more with this patch.
> > > >>>
> > > >>> Can you try with P2P DMA disabled?
> > > >> -CONFIG_PCI_P2PDMA=y
> > > >> +# CONFIG_PCI_P2PDMA is not set
> > > >>
> > > >> If that's what you're meaning, then there's no difference, I'll 
> > > >> upload my dmesg to the gitlab issue
> > > >>
> > > >>> Apart from that can you take a look Arun?
> > > >>>
> > > >>> Thanks,
> > > >>> Christian.
> > > > Hi
> > > >
> > > > Have you had any success in replicating this?
> > > Hi Mike,
> > > I couldn't replicate on my Raven APU machine. I see you have 2 
> > > cards initialized, one is Renoir and the other is Navy Flounder. 
> > > Could you give some more details, are you running Gravity Mark on 
> > > Renoir and what is your system RAM configuration?
> > > >
> > > > Cheers
> > > >
> > > > Mike
> > >
> > Hi
> >
> > It's a PRIME laptop, it failed on the RENOIR too, it caused a 
> > lockup, but systemd managed to capture it, I'll attach it to the 
> > issue
> >
> > I've got 64GB RAM, the 6800M has 12GB VRAM
> >
> > Cheers
> >
> > Mike


RE: [PATCH v12] drm/amdgpu: add drm buddy support to amdgpu

2022-04-26 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only - General]

Hi Christian,

I will check this issue.

Regards,
Arun
-Original Message-
From: Koenig, Christian  
Sent: Tuesday, April 26, 2022 10:06 PM
To: Mike Lothian ; Paneer Selvam, Arunpravin 

Cc: intel-...@lists.freedesktop.org; dri-de...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; Deucher, Alexander ; 
matthew.a...@intel.com
Subject: Re: [PATCH v12] drm/amdgpu: add drm buddy support to amdgpu

Hi Mike,

sounds like somehow stitching together the SG table for PRIME doesn't work any 
more with this patch.

Can you try with P2P DMA disabled?

Apart from that can you take a look Arun?

Thanks,
Christian.

Am 26.04.22 um 17:29 schrieb Mike Lothian:
> Hi
>
> I'm having issues with this patch on my PRIME system and vulkan 
> workloads
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitl
> ab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F1992data=05%7C01%7C
> christian.koenig%40amd.com%7Ce18d158769fc47b08ee708da27998e7e%7C3dd896
> 1fe4884e608e11a82d994e183d%7C0%7C0%7C637865838441574170%7CUnknown%7CTW
> FpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6
> Mn0%3D%7C1000%7C%7C%7Csdata=hQu67WrdUwZn6%2BdXziGz84nMGepI6%2FnlB
> 8XFCFKCnpA%3Dreserved=0
>
> Is there any chance you could take a look?
>
> Cheers
>
> Mike


RE: [drm/selftests] 39ec47bbfd: kernel_BUG_at_drivers/gpu/drm/drm_buddy.c

2022-02-28 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only]

Hi Christian,
I will check

Thanks,
Arun
-Original Message-
From: Koenig, Christian  
Sent: Monday, February 28, 2022 4:29 PM
To: kernel test robot ; Paneer Selvam, Arunpravin 

Cc: 0day robot ; Matthew Auld ; LKML 
; l...@lists.01.org; 
dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; tzimmerm...@suse.de; Deucher, Alexander 

Subject: Re: [drm/selftests] 39ec47bbfd: 
kernel_BUG_at_drivers/gpu/drm/drm_buddy.c

Arun can you take a look at that one here?

It looks like a real problem to me and not just a potential false negative like 
the other issue.

Thanks,
Christian.

Am 27.02.22 um 16:18 schrieb kernel test robot:
>
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: 39ec47bbfd5dd3cea0b711ee9f1acdca37399c86 ("[PATCH v2 2/7] 
> drm/selftests: add drm buddy alloc limit testcase")
> url: 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgith
> ub.com%2F0day-ci%2Flinux%2Fcommits%2FArunpravin%2Fdrm-selftests-Move-i
> 915-buddy-selftests-into-drm%2F20220223-015043data=04%7C01%7Cchri
> stian.koenig%40amd.com%7C3101ff318a994e6eaf5f08d9fa0481ea%7C3dd8961fe4
> 884e608e11a82d994e183d%7C0%7C0%7C637815719552700496%7CUnknown%7CTWFpbG
> Zsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%
> 3D%7C3000sdata=sKvsDtHufRMfSO14HdmHxvNsJiPyDZVDXCFUpWTDwFI%3D
> ;reserved=0 patch link: 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore
> .kernel.org%2Fdri-devel%2F20220222174845.2175-2-Arunpravin.PaneerSelva
> m%40amd.comdata=04%7C01%7Cchristian.koenig%40amd.com%7C3101ff318a
> 994e6eaf5f08d9fa0481ea%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C63
> 7815719552700496%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV
> 2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=aWG4x27aMLcOySO
> UkHbLQ1NL9L8t8AF4dgXux65IIP8%3Dreserved=0
>
> in testcase: boot
>
> on test machine: qemu-system-x86_64 -enable-kvm -cpu Icelake-Server 
> -smp 4 -m 16G
>
> caused below changes (please refer to attached dmesg/kmsg for entire 
> log/backtrace):
>
>
> +---+++
> |   | be9e8c6c00 | 
> | 39ec47bbfd |
> +---+++
> | boot_successes| 14 | 0  
> |
> | boot_failures | 0  | 16 
> |
> | UBSAN:shift-out-of-bounds_in_include/linux/log2.h | 0  | 16 
> |
> | kernel_BUG_at_drivers/gpu/drm/drm_buddy.c | 0  | 16 
> |
> | invalid_opcode:#[##]  | 0  | 16 
> |
> | EIP:drm_buddy_init| 0  | 16 
> |
> | Kernel_panic-not_syncing:Fatal_exception  | 0  | 16 
> |
> +---+++
>
>
> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot 
>
>
> [   68.124177][T1] UBSAN: shift-out-of-bounds in 
> include/linux/log2.h:67:13
> [   68.125333][T1] shift exponent 4294967295 is too large for 32-bit type 
> 'long unsigned int'
> [   68.126563][T1] CPU: 0 PID: 1 Comm: swapper Not tainted 
> 5.17.0-rc2-00311-g39ec47bbfd5d #2
> [   68.127758][T1] Call Trace:
> [ 68.128187][ T1] dump_stack_lvl (lib/dump_stack.c:108) [ 68.128793][ 
> T1] dump_stack (lib/dump_stack.c:114) [ 68.129331][ T1] ubsan_epilogue 
> (lib/ubsan.c:152) [ 68.129958][ T1] 
> __ubsan_handle_shift_out_of_bounds.cold 
> (arch/x86/include/asm/smap.h:85) [ 68.130791][ T1] ? 
> drm_block_alloc+0x28/0x80 [ 68.131582][ T1] ? rcu_read_lock_sched_held 
> (kernel/rcu/update.c:125) [ 68.132215][ T1] ? kmem_cache_alloc 
> (include/trace/events/kmem.h:54 mm/slab.c:3501) [ 68.132878][ T1] ? 
> mark_free+0x2e/0x80 [ 68.133524][ T1] drm_buddy_init.cold 
> (include/linux/log2.h:67 drivers/gpu/drm/drm_buddy.c:131) [ 
> 68.134145][ T1] ? test_drm_cmdline_init 
> (drivers/gpu/drm/selftests/test-drm_buddy.c:87)
> [ 68.134770][ T1] igt_buddy_alloc_limit 
> (drivers/gpu/drm/selftests/test-drm_buddy.c:30)
> [ 68.135472][ T1] ? vprintk_default (kernel/printk/printk.c:2257) [ 
> 68.136057][ T1] ? test_drm_cmdline_init 
> (drivers/gpu/drm/selftests/test-drm_buddy.c:87)
> [ 68.136812][ T1] test_drm_buddy_init 
> (drivers/gpu/drm/selftests/drm_selftest.c:77 
> drivers/gpu/drm/selftests/test-drm_buddy.c:95)
> [ 68.137475][ T1] do_one_initcall (init/main.c:1300) [ 68.138111][ T1] 
> ? parse_args (kernel/params.c:609 kernel/params.c:146 
> kernel/params.c:1

RE: [PATCH v9 1/6] drm: move the buddy allocator from i915 into common drm

2022-01-20 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only]

Hi Matthew,

Do you have suggestions / issues for the other patches, shall we push all the 
other patches into drm-misc-next.

Thanks,
Arun.

I've just gone ahead and pushed this version here to drm-misc-next.

That should at least reduce the amount of mails send back and forth.

Let me know when there are more rbs on the rest and I will push that as 
well.

Thanks,
Christian.

-Original Message-
From: amd-gfx  On Behalf Of Christian 
König
Sent: Wednesday, January 19, 2022 12:54 PM
To: Paneer Selvam, Arunpravin ; 
dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org
Cc: dan...@ffwll.ch; jani.nik...@linux.intel.com; matthew.a...@intel.com; 
tzimmerm...@suse.de; Deucher, Alexander ; Koenig, 
Christian 
Subject: Re: [PATCH v9 1/6] drm: move the buddy allocator from i915 into common 
drm

Am 18.01.22 um 11:44 schrieb Arunpravin:
> Move the base i915 buddy allocator code into drm
> - Move i915_buddy.h to include/drm
> - Move i915_buddy.c to drm root folder
> - Rename "i915" string with "drm" string wherever applicable
> - Rename "I915" string with "DRM" string wherever applicable
> - Fix header file dependencies
> - Fix alignment issues
> - add Makefile support for drm buddy
> - export functions and write kerneldoc description
> - Remove i915 selftest config check condition as buddy selftest
>will be moved to drm selftest folder
>
> cleanup i915 buddy references in i915 driver module
> and replace with drm buddy
>
> v2:
>- include header file in alphabetical order(Thomas)
>- merged changes listed in the body section into a single patch
>  to keep the build intact(Christian, Jani)
>
> v3:
>- make drm buddy a separate module(Thomas, Christian)
>
> v4:
>- Fix build error reported by kernel test robot 
>- removed i915 buddy selftest from i915_mock_selftests.h to
>  avoid build error
>- removed selftests/i915_buddy.c file as we create a new set of
>  buddy test cases in drm/selftests folder
>
> v5:
>- Fix merge conflict issue
>
> v6:
>- replace drm_buddy_mm structure name as drm_buddy(Thomas, Christian)
>- replace drm_buddy_alloc() function name as drm_buddy_alloc_blocks()
>  (Thomas)
>- replace drm_buddy_free() function name as drm_buddy_free_block()
>  (Thomas)
>- export drm_buddy_free_block() function
>- fix multiple instances of KMEM_CACHE() entry
>
> v7:
>- fix warnings reported by kernel test robot 
>- modify the license(Christian)
>
> v8:
>- fix warnings reported by kernel test robot 
>
> Signed-off-by: Arunpravin 

I've just gone ahead and pushed this version here to drm-misc-next.

That should at least reduce the amount of mails send back and forth.

Let me know when there are more rbs on the rest and I will push that as 
well.

Thanks,
Christian.

> ---
>   drivers/gpu/drm/Kconfig   |   6 +
>   drivers/gpu/drm/Makefile  |   2 +
>   drivers/gpu/drm/drm_buddy.c   | 535 
>   drivers/gpu/drm/i915/Kconfig  |   1 +
>   drivers/gpu/drm/i915/Makefile |   1 -
>   drivers/gpu/drm/i915/i915_buddy.c | 466 ---
>   drivers/gpu/drm/i915/i915_buddy.h | 143 
>   drivers/gpu/drm/i915/i915_module.c|   3 -
>   drivers/gpu/drm/i915/i915_scatterlist.c   |  11 +-
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  33 +-
>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   4 +-
>   drivers/gpu/drm/i915/selftests/i915_buddy.c   | 787 --
>   .../drm/i915/selftests/i915_mock_selftests.h  |   1 -
>   .../drm/i915/selftests/intel_memory_region.c  |  13 +-
>   include/drm/drm_buddy.h   | 150 
>   15 files changed, 725 insertions(+), 1431 deletions(-)
>   create mode 100644 drivers/gpu/drm/drm_buddy.c
>   delete mode 100644 drivers/gpu/drm/i915/i915_buddy.c
>   delete mode 100644 drivers/gpu/drm/i915/i915_buddy.h
>   delete mode 100644 drivers/gpu/drm/i915/selftests/i915_buddy.c
>   create mode 100644 include/drm/drm_buddy.h
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 91f54aeb0b7c..cc3e979c9c9d 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -204,6 +204,12 @@ config DRM_TTM
> GPU memory types. Will be enabled automatically if a device driver
> uses it.
>   
> +config DRM_BUDDY
> + tristate
> + depends on DRM
> + help
> +   A page based buddy allocator
> +
>   config DRM_VRAM_HELPER
>   tristate
>   depends on DRM
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/dr

RE: [PATCH v4 4/6] drm: implement a method to free unused pages

2021-12-09 Thread Paneer Selvam, Arunpravin
[Public]

Hi Matthew,

Ping?

Regards,
Arun
-Original Message-
From: Paneer Selvam, Arunpravin  
Sent: Wednesday, December 1, 2021 10:10 PM
To: dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org
Cc: matthew.a...@intel.com; dan...@ffwll.ch; Koenig, Christian 
; Deucher, Alexander ; 
tzimmerm...@suse.de; jani.nik...@linux.intel.com; Paneer Selvam, Arunpravin 

Subject: [PATCH v4 4/6] drm: implement a method to free unused pages

On contiguous allocation, we round up the size to the *next* power of 2, 
implement a function to free the unused pages after the newly allocate block.

v2(Matthew Auld):
  - replace function name 'drm_buddy_free_unused_pages' with
drm_buddy_block_trim
  - replace input argument name 'actual_size' with 'new_size'
  - add more validation checks for input arguments
  - add overlaps check to avoid needless searching and splitting
  - merged the below patch to see the feature in action
- add free unused pages support to i915 driver
  - lock drm_buddy_block_trim() function as it calls mark_free/mark_split
are all globally visible

v3:
  - remove drm_buddy_block_trim() error handling and
print a warn message if it fails

Signed-off-by: Arunpravin 
---
 drivers/gpu/drm/drm_buddy.c   | 72 ++-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 +++
 include/drm/drm_buddy.h   |  4 ++
 3 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 
eddc1eeda02e..707efc82216d 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -434,7 +434,8 @@ alloc_from_freelist(struct drm_buddy_mm *mm,  static int 
__alloc_range(struct drm_buddy_mm *mm,
 struct list_head *dfs,
 u64 start, u64 size,
-struct list_head *blocks)
+struct list_head *blocks,
+bool trim_path)
 {
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
@@ -480,8 +481,20 @@ static int __alloc_range(struct drm_buddy_mm *mm,
 
if (!drm_buddy_block_is_split(block)) {
err = split_block(mm, block);
-   if (unlikely(err))
+   if (unlikely(err)) {
+   if (trim_path)
+   /*
+* Here in case of trim, we return and 
dont goto
+* split failure path as it removes 
from the
+* original list and potentially also 
freeing
+* the block. so we could leave as it 
is,
+* worse case we get some internal 
fragmentation
+* and leave the decision to the user
+*/
+   return err;
+
goto err_undo;
+   }
}
 
list_add(>right->tmp_link, dfs); @@ -535,8 +548,61 @@ 
static int __drm_buddy_alloc_range(struct drm_buddy_mm *mm,
for (i = 0; i < mm->n_roots; ++i)
list_add_tail(>roots[i]->tmp_link, );
 
-   return __alloc_range(mm, , start, size, blocks);
+   return __alloc_range(mm, , start, size, blocks, 0); }
+
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: output list head to add allocated blocks
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and it can be freed.
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy_mm *mm,
+u64 new_size,
+struct list_head *blocks)
+{
+   struct drm_buddy_block *block;
+   u64 new_start;
+   LIST_HEAD(dfs);
+
+   if (!list_is_singular(blocks))
+   return -EINVAL;
+
+   block = list_first_entry(blocks,
+struct drm_buddy_block,
+link);
+
+   if (!drm_buddy_block_is_allocated(block))
+   return -EINVAL;
+
+   if (new_size > drm_buddy_block_size(mm, block))
+   return -EINVAL;
+
+   if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
+   return -EINVAL;
+
+   if (new_size == drm_buddy_block_size(mm, block))
+   return 0;
+
+   list_del(>link);
+
+   new_start = drm_buddy_block_offset(block);
+
+   mark_free(mm, block);
+
+   list_add(>tmp_link, );
+
+   return __alloc_range(mm, , new_start, new_size, blocks, 1);

RE: [PATCH v4 2/6] drm: improve drm_buddy_alloc function

2021-12-09 Thread Paneer Selvam, Arunpravin
[AMD Official Use Only]

Hi Matthew,

Ping on this?

Regards,
Arun
-Original Message-
From: amd-gfx  On Behalf Of Arunpravin
Sent: Wednesday, December 1, 2021 10:10 PM
To: dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org
Cc: dan...@ffwll.ch; Paneer Selvam, Arunpravin 
; jani.nik...@linux.intel.com; 
matthew.a...@intel.com; tzimmerm...@suse.de; Deucher, Alexander 
; Koenig, Christian 
Subject: [PATCH v4 2/6] drm: improve drm_buddy_alloc function

- Make drm_buddy_alloc a single function to handle
  range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
  the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
  i915 driver to drm buddy

v2:
  merged below changes to keep the build unbroken
   - drm_buddy_alloc_range() becomes obsolete and may be removed
   - enable ttm range allocation (fpfn / lpfn) support in i915 driver
   - apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
  - Fix alignment issues and remove unnecessary list_empty check
  - add more validation checks for input arguments
  - make alloc_range() block allocations as bottom-up
  - optimize order computation logic
  - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
  - keep drm_buddy_alloc_range() function implementation for generic
actual range allocations
  - keep alloc_range() implementation for end bias allocations

Signed-off-by: Arunpravin 
---
 drivers/gpu/drm/drm_buddy.c   | 316 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
 include/drm/drm_buddy.h   |  22 +-
 4 files changed, 285 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 
9340a4b61c5a..7f47632821f4 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -280,23 +280,97 @@ void drm_buddy_free_list(struct drm_buddy_mm *mm, struct 
list_head *objects)  }  EXPORT_SYMBOL(drm_buddy_free_list);
 
-/**
- * drm_buddy_alloc - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the _buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc(struct drm_buddy_mm *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) {
+   return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) {
+   return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy_mm *mm,
+u64 start, u64 end,
+unsigned int order)
+{
+   struct drm_buddy_block *block;
+   struct drm_buddy_block *buddy;
+   LIST_HEAD(dfs);
+   int err;
+   int i;
+
+   end = end - 1;
+
+   for (i = 0; i < mm->n_roots; ++i)
+   list_add_tail(>roots[i]->tmp_link, );
+
+   do {
+   u64 block_start;
+   u64 block_end;
+
+   block = list_first_entry_or_null(,
+struct drm_buddy_block,
+tmp_link);
+   if (!block)
+   break;
+
+   list_del(>tmp_link);
+
+   if (drm_buddy_block_order(block) < order)
+   continue;
+
+   block_start = drm_buddy_block_offset(block);
+   block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+   if (!overlaps(start, end, block_start, block_end))
+   continue;
+
+   if (drm_buddy_block_is_allocated(block))
+   continue;
+
+   if (contains(start, end, block_start, block_end) &&
+   order == drm_buddy_block_order(block)) {
+   /*
+* Find the free block within the range.
+*/
+   if (drm_buddy_block_is_free(block))
+   return block;
+
+   continue;
+   }
+
+   if (!drm_buddy_block_is_split(block)) {
+   err = split_block(mm, block);
+   if (unlikely(err))
+   goto err_undo;
+   }
+
+   list_add(>right->tmp_link, );
+   list_add(>left->tmp_link, );
+   } while (1);
+
+   return ERR_PTR(-ENOSPC);
+
+err_undo:
+   /*
+* We really don't want to leave around a bunch of split blocks, sinc

RE: [PATCH 1/2] Enable buddy memory manager support

2021-09-22 Thread Paneer Selvam, Arunpravin
[AMD Public Use]

Hi Christian,

> And where is the patch to switch i915 and remove the Intel copy of this?
Creating a patch for the switch.

> In general I think that every public function here needs a kerneldoc 
> description what it is all about.
Making a kernel doc description for each public function

Thanks,
Arun
-Original Message-
From: Koenig, Christian  
Sent: Wednesday, September 22, 2021 12:28 PM
To: Paneer Selvam, Arunpravin ; 
dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; matthew.a...@intel.com; dan...@ffwll.ch; 
Deucher, Alexander 
Subject: Re: [PATCH 1/2] Enable buddy memory manager support

Am 20.09.21 um 21:20 schrieb Arunpravin:
> Port Intel buddy system manager to drm root folder Add CPU 
> mappable/non-mappable region support to the drm buddy manager

And where is the patch to switch i915 and remove the Intel copy of this?

In general I think that every public function here needs a kerneldoc 
description what it is all about.

Regards,
Christian.

>
> Signed-off-by: Arunpravin 
> ---
>   drivers/gpu/drm/Makefile|   2 +-
>   drivers/gpu/drm/drm_buddy.c | 465 
>   include/drm/drm_buddy.h | 154 
>   3 files changed, 620 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/drm_buddy.c
>   create mode 100644 include/drm/drm_buddy.h
>
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 
> a118692a6df7..fe1a2fc09675 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -18,7 +18,7 @@ drm-y   :=  drm_aperture.o drm_auth.o drm_cache.o \
>   drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
>   drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
>   drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
> - drm_managed.o drm_vblank_work.o
> + drm_managed.o drm_vblank_work.o drm_buddy.o
>   
>   drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o 
> drm_dma.o \
>   drm_legacy_misc.o drm_lock.o drm_memory.o 
> drm_scatter.o \ 
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c 
> new file mode 100644 index ..f07919a004b6
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -0,0 +1,465 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright   2021 Intel Corporation  */
> +
> +#include 
> +#include 
> +
> +static struct drm_buddy_block *drm_block_alloc(struct drm_buddy_mm *mm,
> + struct drm_buddy_block *parent, unsigned int order,
> + u64 offset)
> +{
> + struct drm_buddy_block *block;
> +
> + BUG_ON(order > DRM_BUDDY_MAX_ORDER);
> +
> + block = kmem_cache_zalloc(mm->slab_blocks, GFP_KERNEL);
> + if (!block)
> + return NULL;
> +
> + block->header = offset;
> + block->header |= order;
> + block->parent = parent;
> + block->start = offset >> PAGE_SHIFT;
> + block->size = (mm->chunk_size << order) >> PAGE_SHIFT;
> +
> + BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
> + return block;
> +}
> +
> +static void drm_block_free(struct drm_buddy_mm *mm, struct 
> +drm_buddy_block *block) {
> + kmem_cache_free(mm->slab_blocks, block); }
> +
> +static void add_ordered(struct drm_buddy_mm *mm, struct 
> +drm_buddy_block *block) {
> + struct drm_buddy_block *node;
> +
> + if (list_empty(>free_list[drm_buddy_block_order(block)])) {
> + list_add(>link,
> + >free_list[drm_buddy_block_order(block)]);
> + return;
> + }
> +
> + list_for_each_entry(node, >free_list[drm_buddy_block_order(block)], 
> link)
> + if (block->start > node->start)
> + break;
> +
> + __list_add(>link, node->link.prev, >link); }
> +
> +static void mark_allocated(struct drm_buddy_block *block) {
> + block->header &= ~DRM_BUDDY_HEADER_STATE;
> + block->header |= DRM_BUDDY_ALLOCATED;
> +
> + list_del(>link);
> +}
> +
> +static void mark_free(struct drm_buddy_mm *mm,
> +   struct drm_buddy_block *block) {
> + block->header &= ~DRM_BUDDY_HEADER_STATE;
> + block->header |= DRM_BUDDY_FREE;
> +
> + add_ordered(mm, block);
> +}
> +
> +static void mark_split(struct drm_buddy_block *block) {
> + block->header &= ~DRM_BUDDY_HEADER_STATE;
> + block->header |= DRM_BUDDY_SPLIT;
> +
> + list_del(>link);
> +}
> +
> +int drm_buddy_init(struct drm

RE: [PATCH 1/2] Enable buddy memory manager support

2021-09-22 Thread Paneer Selvam, Arunpravin
[AMD Public Use]

Hi Alex,
I will fix the name and send a document in my next version.

Thanks,
Arun
-Original Message-
From: Alex Deucher  
Sent: Tuesday, September 21, 2021 12:54 AM
To: Paneer Selvam, Arunpravin 
Cc: Maling list - DRI developers ; Intel 
Graphics Development ; amd-gfx list 
; Koenig, Christian ; 
Matthew Auld ; Daniel Vetter ; 
Deucher, Alexander 
Subject: Re: [PATCH 1/2] Enable buddy memory manager support

On Mon, Sep 20, 2021 at 3:21 PM Arunpravin  
wrote:

Please prefix the patch subject with drm.  E.g.,
drm: Enable buddy memory manager support

Same for the second patch, but make it drm/amdgpu instead.

Alex

>
> Port Intel buddy system manager to drm root folder Add CPU 
> mappable/non-mappable region support to the drm buddy manager
>
> Signed-off-by: Arunpravin 
> ---
>  drivers/gpu/drm/Makefile|   2 +-
>  drivers/gpu/drm/drm_buddy.c | 465 
>  include/drm/drm_buddy.h | 154 
>  3 files changed, 620 insertions(+), 1 deletion(-)  create mode 100644 
> drivers/gpu/drm/drm_buddy.c  create mode 100644 
> include/drm/drm_buddy.h
>
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 
> a118692a6df7..fe1a2fc09675 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -18,7 +18,7 @@ drm-y   :=drm_aperture.o drm_auth.o drm_cache.o 
> \
> drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
> drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
> drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
> -   drm_managed.o drm_vblank_work.o
> +   drm_managed.o drm_vblank_work.o drm_buddy.o
>
>  drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o 
> drm_dma.o \
> drm_legacy_misc.o drm_lock.o drm_memory.o 
> drm_scatter.o \ diff --git a/drivers/gpu/drm/drm_buddy.c 
> b/drivers/gpu/drm/drm_buddy.c new file mode 100644 index 
> ..f07919a004b6
> --- /dev/null
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -0,0 +1,465 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright   2021 Intel Corporation  */
> +
> +#include 
> +#include 
> +
> +static struct drm_buddy_block *drm_block_alloc(struct drm_buddy_mm *mm,
> +   struct drm_buddy_block *parent, unsigned int order,
> +   u64 offset)
> +{
> +   struct drm_buddy_block *block;
> +
> +   BUG_ON(order > DRM_BUDDY_MAX_ORDER);
> +
> +   block = kmem_cache_zalloc(mm->slab_blocks, GFP_KERNEL);
> +   if (!block)
> +   return NULL;
> +
> +   block->header = offset;
> +   block->header |= order;
> +   block->parent = parent;
> +   block->start = offset >> PAGE_SHIFT;
> +   block->size = (mm->chunk_size << order) >> PAGE_SHIFT;
> +
> +   BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
> +   return block;
> +}
> +
> +static void drm_block_free(struct drm_buddy_mm *mm, struct 
> +drm_buddy_block *block) {
> +   kmem_cache_free(mm->slab_blocks, block); }
> +
> +static void add_ordered(struct drm_buddy_mm *mm, struct 
> +drm_buddy_block *block) {
> +   struct drm_buddy_block *node;
> +
> +   if (list_empty(>free_list[drm_buddy_block_order(block)])) {
> +   list_add(>link,
> +   >free_list[drm_buddy_block_order(block)]);
> +   return;
> +   }
> +
> +   list_for_each_entry(node, 
> >free_list[drm_buddy_block_order(block)], link)
> +   if (block->start > node->start)
> +   break;
> +
> +   __list_add(>link, node->link.prev, >link); }
> +
> +static void mark_allocated(struct drm_buddy_block *block) {
> +   block->header &= ~DRM_BUDDY_HEADER_STATE;
> +   block->header |= DRM_BUDDY_ALLOCATED;
> +
> +   list_del(>link);
> +}
> +
> +static void mark_free(struct drm_buddy_mm *mm,
> + struct drm_buddy_block *block) {
> +   block->header &= ~DRM_BUDDY_HEADER_STATE;
> +   block->header |= DRM_BUDDY_FREE;
> +
> +   add_ordered(mm, block);
> +}
> +
> +static void mark_split(struct drm_buddy_block *block) {
> +   block->header &= ~DRM_BUDDY_HEADER_STATE;
> +   block->header |= DRM_BUDDY_SPLIT;
> +
> +   list_del(>link);
> +}
> +
> +int drm_buddy_init(struct drm_buddy_mm *mm, u64 size, u64 chunk_size) 
> +{
> +   unsigned int i;
> +   u64 offset;
> +
> +   if (size < chunk_size)
> +   return -E

RE: [PATCH 2/2] Add drm buddy manager support to amdgpu driver

2021-09-22 Thread Paneer Selvam, Arunpravin
[AMD Public Use]

Hi Christian,
Thanks for the review, I will the send the next version fixing all issues.

Regards,
Arun
-Original Message-
From: Christian König  
Sent: Wednesday, September 22, 2021 12:18 PM
To: Paneer Selvam, Arunpravin ; Koenig, 
Christian ; dri-de...@lists.freedesktop.org; 
intel-...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; 
matthew.a...@intel.com; dan...@ffwll.ch; Deucher, Alexander 

Subject: Re: [PATCH 2/2] Add drm buddy manager support to amdgpu driver

Am 21.09.21 um 17:51 schrieb Paneer Selvam, Arunpravin:
> [AMD Public Use]
>
> Hi Christian,
> Please find my comments.

A better mail client might be helpful for mailing list communication. I use 
Thunderbird, but Outlook with appropriate setting should do as well.

>
> Thanks,
> Arun
> -Original Message-
> From: Koenig, Christian 
> Sent: Tuesday, September 21, 2021 2:34 PM
> To: Paneer Selvam, Arunpravin ; 
> dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
> amd-gfx@lists.freedesktop.org; matthew.a...@intel.com; 
> dan...@ffwll.ch; Deucher, Alexander 
> Subject: Re: [PATCH 2/2] Add drm buddy manager support to amdgpu 
> driver
>
> Am 20.09.21 um 21:21 schrieb Arunpravin:
> [SNIP]
>> +struct list_head blocks;
>> +};
>> +
>> +static inline struct amdgpu_vram_mgr_node * 
>> +to_amdgpu_vram_mgr_node(struct ttm_resource *res) {
>> +return container_of(container_of(res, struct ttm_range_mgr_node, base),
>> +struct amdgpu_vram_mgr_node, tnode); }
>> +
> Maybe stuff that in a separate amdgpu_vram_mgr.h file together with all the 
> other defines for the vram manager.
>
> Arun - I thought about it, will create a new header file for vram 
> manager

Maybe make that a separate patch before this one here.

>> +if (mode == DRM_BUDDY_ALLOC_RANGE) {
>> +r = drm_buddy_alloc_range(mm, >blocks,
>> +(uint64_t)place->fpfn << PAGE_SHIFT, pages << 
>> PAGE_SHIFT);
> That handling won't work. It's possible that you need contiguous memory in a 
> specific range.
>
> Arun - the existing default backend range handler allocates contiguous 
> nodes in power of 2 finding the MSB's of the any given size. We get linked 
> nodes (depends on the requested size) in continuous range of address.
> Example, for the size 768 pages request, we get 512 + 256 range of continuous 
> address in 2 nodes.
>
> It works by passing the fpfn and the requested size, the backend handler 
> calculates the lpfn by adding fpfn + size = lpfn.
> The drawback here are we are not handling the specific lpfn value (as 
> of now it is calculated using the fpfn + requested size) and not following 
> the pages_per_node rule.
>
> Please let me know if this won't work for all specific fpfn / lpfn 
> cases

 From your description that sounds like it won't work at all for any cases.

See the fpfn/lpfn specifies the range of allocation. For the most common case 
that's either 0..visible_vram or 0..start_of_some_hw_limitation.

When you always try to allocate the range from 0 you will quickly find that you 
clash with existing allocations.

What you need to do in general is to have a drm_buddy_alloc() which is able to 
limit the returned page to the desired range fpfn..lpfn.

>> +
>> +do {
>> +unsigned int order;
>> +
>> +order = fls(n_pages) - 1;
>> +BUG_ON(order > mm->max_order);
>> +
>> +spin_lock(>lock);
>> +block = drm_buddy_alloc(mm, order, 
>> bar_limit_enabled,
>> +
>> visible_pfn, mode);
> That doesn't seem to make much sense either. The backend allocator should not 
> care about the BAR size nor the visible_pfn.
>
> Arun - we are sending the BAR limit enable information (in case of APU 
> or large BAR, we take different approach) and visible_pfn Information.
>
> In case of bar_limit_enabled is true, I thought visible_pfn required 
> for the backend allocator to compare with the block start address and 
> find the desired blocks for the TOP-DOWN and BOTTOM-UP approach (TOP-DOWN - 
> return blocks higher than the visible_pfn limit, BOTTOM-UP - return blocks 
> lower than the visible_pfn limit).
>
> In case of bar_limit_enabled is false, we just return the top ordered 
> blocks and bottom most blocks for the TOP-DOWN and BOTTOM-UP respectively 
> (suitable for APU and Large BAR case).
>
> Please let me know if we have other way to fix this problem

That is the completely wrong approach. The backend mus

RE: [PATCH 2/2] Add drm buddy manager support to amdgpu driver

2021-09-21 Thread Paneer Selvam, Arunpravin
[AMD Public Use]

Hi Christian,
Please find my comments.

Thanks,
Arun
-Original Message-
From: Koenig, Christian  
Sent: Tuesday, September 21, 2021 2:34 PM
To: Paneer Selvam, Arunpravin ; 
dri-de...@lists.freedesktop.org; intel-...@lists.freedesktop.org; 
amd-gfx@lists.freedesktop.org; matthew.a...@intel.com; dan...@ffwll.ch; 
Deucher, Alexander 
Subject: Re: [PATCH 2/2] Add drm buddy manager support to amdgpu driver

Am 20.09.21 um 21:21 schrieb Arunpravin:
> Replace drm_mm with drm buddy manager for VRAM memory management
>
> Signed-off-by: Arunpravin 
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h|  78 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c  | 216 ++
>   3 files changed, 189 insertions(+), 108 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> index acfa207cf970..ba24052e9062 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -30,12 +30,25 @@
>   #include 
>   #include 
>   
> +struct amdgpu_vram_mgr_node {
> + struct ttm_range_mgr_node tnode;

NAK, don't base this on ttm_range_mgr_node. Instead use ttm_resource.

And please name the member base instead.
Arun - ok

> + struct list_head blocks;
> +};
> +
> +static inline struct amdgpu_vram_mgr_node * 
> +to_amdgpu_vram_mgr_node(struct ttm_resource *res) {
> + return container_of(container_of(res, struct ttm_range_mgr_node, base),
> + struct amdgpu_vram_mgr_node, tnode); }
> +

Maybe stuff that in a separate amdgpu_vram_mgr.h file together with all the 
other defines for the vram manager.

Arun - I thought about it, will create a new header file for vram manager

>   /* state back for walking over vram_mgr and gtt_mgr allocations */
>   struct amdgpu_res_cursor {
>   uint64_tstart;
>   uint64_tsize;
>   uint64_tremaining;
> - struct drm_mm_node  *node;
> + void*node;
> + uint32_tmem_type;
>   };
>   
>   /**
> @@ -52,8 +65,6 @@ static inline void amdgpu_res_first(struct ttm_resource 
> *res,
>   uint64_t start, uint64_t size,
>   struct amdgpu_res_cursor *cur)
>   {
> - struct drm_mm_node *node;
> -
>   if (!res || res->mem_type == TTM_PL_SYSTEM) {
>   cur->start = start;
>   cur->size = size;
> @@ -65,14 +76,39 @@ static inline void amdgpu_res_first(struct 
> ttm_resource *res,
>   
>   BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
>   
> - node = to_ttm_range_mgr_node(res)->mm_nodes;
> - while (start >= node->size << PAGE_SHIFT)
> - start -= node++->size << PAGE_SHIFT;
> + cur->mem_type = res->mem_type;
> +
> + if (cur->mem_type == TTM_PL_VRAM) {

Rather use a switch/case here.
Arun - ok

> + struct drm_buddy_block *block;
> + struct list_head *head, *next;
> +
> + head = _amdgpu_vram_mgr_node(res)->blocks;
> +
> + block = list_first_entry_or_null(head, struct drm_buddy_block, 
> link);
> + while (start >= block->size << PAGE_SHIFT) {
> + start -= block->size << PAGE_SHIFT;
> +
> + next = block->link.next;
> + if (next != head)
> + block = list_entry(next, struct 
> drm_buddy_block, link);
> + }
>   
> - cur->start = (node->start << PAGE_SHIFT) + start;
> - cur->size = min((node->size << PAGE_SHIFT) - start, size);
> - cur->remaining = size;
> - cur->node = node;
> + cur->start = (block->start << PAGE_SHIFT) + start;
> + cur->size = min((block->size << PAGE_SHIFT) - start, size);
> + cur->remaining = size;
> + cur->node = block;
> + } else if (cur->mem_type == TTM_PL_TT) {
> + struct drm_mm_node *node;
> +
> + node = to_ttm_range_mgr_node(res)->mm_nodes;
> + while (start >= node->size << PAGE_SHIFT)
> + start -= node++->size << PAGE_SHIFT;
> +
> + cur->start = (node->start << PAGE_SHIFT) + start;
> + cur->size = min((node->size << PAGE_SHIFT) - start, size);
> + cur->remaining = size;
> + cur->node = node;

With a defa

RE: [PATCH 1/8] drm/amdgpu: new resource cursor

2021-03-12 Thread Paneer Selvam, Arunpravin
[AMD Public Use]

Hi Christian,
Reviewed the changes, it looks good to me.

Reviewed-by: Arunpravin 

Thanks,
Arun
-Original Message-
From: Christian König  
Sent: Friday, March 12, 2021 4:22 PM
To: Paneer Selvam, Arunpravin ; 
amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/8] drm/amdgpu: new resource cursor

Any more comments on this set here or otherwise I'm going to push it with just 
Oaks ack.

Thanks,
Christian.

Am 08.03.21 um 14:40 schrieb Christian König:
> Allows to walk over the drm_mm nodes in a TTM resource object.
>
> Signed-off-by: Christian König 
> Acked-by: Oak Zeng 
> Tested-by: Nirmoy Das 
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h| 105 ++
>   1 file changed, 105 insertions(+)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> new file mode 100644
> index ..1335e098510f
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
> @@ -0,0 +1,105 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person 
> +obtaining a
> + * copy of this software and associated documentation files (the 
> +"Software"),
> + * to deal in the Software without restriction, including without 
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute, 
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom 
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be 
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, 
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Christian König
> + */
> +
> +#ifndef __AMDGPU_RES_CURSOR_H__
> +#define __AMDGPU_RES_CURSOR_H__
> +
> +#include 
> +#include 
> +
> +/* state back for walking over vram_mgr and gtt_mgr allocations */ 
> +struct amdgpu_res_cursor {
> + uint64_tstart;
> + uint64_tsize;
> + uint64_tremaining;
> + struct drm_mm_node  *node;
> +};
> +
> +/**
> + * amdgpu_res_first - initialize a amdgpu_res_cursor
> + *
> + * @res: TTM resource object to walk
> + * @start: Start of the range
> + * @size: Size of the range
> + * @cur: cursor object to initialize
> + *
> + * Start walking over the range of allocations between @start and @size.
> + */
> +static inline void amdgpu_res_first(struct ttm_resource *res,
> + uint64_t start, uint64_t size,
> + struct amdgpu_res_cursor *cur) {
> + struct drm_mm_node *node;
> +
> + if (!res || !res->mm_node) {
> + cur->start = start;
> + cur->size = size;
> + cur->remaining = size;
> + cur->node = NULL;
> + return;
> + }
> +
> + BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
> +
> + node = res->mm_node;
> + while (start > node->size << PAGE_SHIFT)
> + start -= node++->size << PAGE_SHIFT;
> +
> + cur->start = (node->start << PAGE_SHIFT) + start;
> + cur->size = (node->size << PAGE_SHIFT) - start;
> + cur->remaining = size;
> + cur->node = node;
> +}
> +
> +/**
> + * amdgpu_res_next - advance the cursor
> + *
> + * @cur: the cursor to advance
> + * @size: number of bytes to move forward
> + *
> + * Move the cursor @size bytes forwrad, walking to the next node if 
> necessary.
> + */
> +static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, 
> +uint64_t size) {
> + struct drm_mm_node *node = cur->node;
> +
> + BUG_ON(size > cur->remaining);
> +
> + cur->remaining -= size;
> + if (!cur->remaining)
> + return;
> +
> + cur->size -= size;
> + if (cur->size) {
> + cur->start += size;
> + return;
> + }
> +
> + cur->node = ++node;
> + cur->start = node->start << PAGE_SHIFT;
> + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); }
> +
> +#endif
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amd/pm/swsmu: clean up user profile function

2021-03-01 Thread Paneer Selvam, Arunpravin
[AMD Public Use]



-Original Message-
From: Quan, Evan  
Sent: Tuesday, March 2, 2021 6:54 AM
To: Paneer Selvam, Arunpravin ; 
amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Lazar, Lijo 
; Paneer Selvam, Arunpravin 

Subject: RE: [PATCH] drm/amd/pm/swsmu: clean up user profile function

[AMD Official Use Only - Internal Distribution Only]



-Original Message-
From: amd-gfx  On Behalf Of Arunpravin
Sent: Tuesday, March 2, 2021 2:51 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Lazar, Lijo 
; Paneer Selvam, Arunpravin 

Subject: [PATCH] drm/amd/pm/swsmu: clean up user profile function

Remove unnecessary comments, enable restore mode using '|=' operator, fixes the 
alignment to improve the code readability.

Signed-off-by: Arunpravin 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 22 ++
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index f5d9590f2178..7d7ef4fa2887 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -315,35 +315,25 @@ static void smu_set_user_clk_dependencies(struct 
smu_context *smu, enum smu_clk_
if (smu->adev->in_suspend)
return;
 
-   /*
-* mclk, fclk and socclk are interdependent
-* on each other
-*/
if (clk == SMU_MCLK) {
-   /* reset clock dependency */
smu->user_dpm_profile.clk_dependency = 0;
-   /* set mclk dependent clocks(fclk and socclk) */
smu->user_dpm_profile.clk_dependency = BIT(SMU_FCLK) | 
BIT(SMU_SOCCLK);
} else if (clk == SMU_FCLK) {
-   /* give priority to mclk, if mclk dependent clocks are set */
+   /* MCLK takes precedence over FCLK */
if (smu->user_dpm_profile.clk_dependency == (BIT(SMU_FCLK) | 
BIT(SMU_SOCCLK)))
return;
 
-   /* reset clock dependency */
smu->user_dpm_profile.clk_dependency = 0;
-   /* set fclk dependent clocks(mclk and socclk) */
smu->user_dpm_profile.clk_dependency = BIT(SMU_MCLK) | 
BIT(SMU_SOCCLK);
} else if (clk == SMU_SOCCLK) {
-   /* give priority to mclk, if mclk dependent clocks are set */
+   /* MCLK takes precedence over SOCCLK */
if (smu->user_dpm_profile.clk_dependency == (BIT(SMU_FCLK) | 
BIT(SMU_SOCCLK)))
return;
 
-   /* reset clock dependency */
smu->user_dpm_profile.clk_dependency = 0;
-   /* set socclk dependent clocks(mclk and fclk) */
smu->user_dpm_profile.clk_dependency = BIT(SMU_MCLK) | 
BIT(SMU_FCLK);
} else
-   /* add clk dependencies here, if any */
+   /* Add clk dependencies here, if any */
return;
 }
 
@@ -367,7 +357,7 @@ static void smu_restore_dpm_user_profile(struct smu_context 
*smu)
return;
 
/* Enable restore flag */
-   smu->user_dpm_profile.flags = SMU_DPM_USER_PROFILE_RESTORE;
+   smu->user_dpm_profile.flags |= SMU_DPM_USER_PROFILE_RESTORE;
[Quan, Evan] You may need to change other checks(e.g. 
"smu->user_dpm_profile.flags != SMU_DPM_USER_PROFILE_RESTORE ") to use "&"( 
smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE) to pair with this.

Evan

[Arun] I missed it, will fix.

Thanks,
Arun
 
/* set the user dpm power limit */
if (smu->user_dpm_profile.power_limit) { @@ -390,8 +380,8 @@ static 
void smu_restore_dpm_user_profile(struct smu_context *smu)
ret = smu_force_smuclk_levels(smu, clk_type,

smu->user_dpm_profile.clk_mask[clk_type]);
if (ret)
-   dev_err(smu->adev->dev, "Failed to set 
clock type = %d\n",
-   clk_type);
+   dev_err(smu->adev->dev,
+   "Failed to set clock type = 
%d\n", clk_type);
}
}
}
--
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cevan.quan%40amd.com%7Cd5866f2c3da84366c72108d8dce28c7e%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637502212874693914%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=%2Bwe2XHK3BiX8VpVv9Ut7rJ28NJxUtWaCXo4wVpngxoM%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx