Re: [PATCH v9 2/6] drm: improve drm_buddy_alloc function

2022-01-26 Thread Arunpravin



On 21/01/22 5:30 pm, Matthew Auld wrote:
> On 19/01/2022 11:37, Arunpravin wrote:
>> - Make drm_buddy_alloc a single function to handle
>>range allocation and non-range allocation demands
>>
>> - Implemented a new function alloc_range() which allocates
>>the requested power-of-two block comply with range limitations
>>
>> - Moved order computation and memory alignment logic from
>>i915 driver to drm buddy
>>
>> v2:
>>merged below changes to keep the build unbroken
>> - drm_buddy_alloc_range() becomes obsolete and may be removed
>> - enable ttm range allocation (fpfn / lpfn) support in i915 driver
>> - apply enhanced drm_buddy_alloc() function to i915 driver
>>
>> v3(Matthew Auld):
>>- Fix alignment issues and remove unnecessary list_empty check
>>- add more validation checks for input arguments
>>- make alloc_range() block allocations as bottom-up
>>- optimize order computation logic
>>- replace uint64_t with u64, which is preferred in the kernel
>>
>> v4(Matthew Auld):
>>- keep drm_buddy_alloc_range() function implementation for generic
>>  actual range allocations
>>- keep alloc_range() implementation for end bias allocations
>>
>> v5(Matthew Auld):
>>- modify drm_buddy_alloc() passing argument place->lpfn to lpfn
>>  as place->lpfn will currently always be zero for i915
>>
>> v6(Matthew Auld):
>>- fixup potential uaf - If we are unlucky and can't allocate
>>  enough memory when splitting blocks, where we temporarily
>>  end up with the given block and its buddy on the respective
>>  free list, then we need to ensure we delete both blocks,
>>  and no just the buddy, before potentially freeing them
> 
> Hmm, not sure we really want to squash existing bug fixes into this 
> patch. Perhaps bring in [1] to the start of your series? i915_buddy is 
> gone now. Alternatively I can resend such that it applies on top 
> drm_buddy. Your choice.
> 
> [1] 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F469806%2F%3Fseries%3D98953%26rev%3D1data=04%7C01%7CArunpravin.PaneerSelvam%40amd.com%7Ce451a48532e74b6c138408d9dcd5aef0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637783632587526317%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=0UoMVUwlwpu8AbB%2BgJrmRBEc7VPt8aAcraRnWkU83ag%3Dreserved=0
> 

I will revert this fix in v10, please resend on top drm_buddy
>>
>>- fix warnings reported by kernel test robot 
>>
>> Signed-off-by: Arunpravin 
>> ---
>>   drivers/gpu/drm/drm_buddy.c   | 326 +-
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
>>   drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
>>   include/drm/drm_buddy.h   |  22 +-
>>   4 files changed, 293 insertions(+), 124 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
>> index d60878bc9c20..954e31962c74 100644
>> --- a/drivers/gpu/drm/drm_buddy.c
>> +++ b/drivers/gpu/drm/drm_buddy.c
>> @@ -282,23 +282,99 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct 
>> list_head *objects)
>>   }
>>   EXPORT_SYMBOL(drm_buddy_free_list);
>>   
>> -/**
>> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
>> - *
>> - * @mm: DRM buddy manager to allocate from
>> - * @order: size of the allocation
>> - *
>> - * The order value here translates to:
>> - *
>> - * 0 = 2^0 * mm->chunk_size
>> - * 1 = 2^1 * mm->chunk_size
>> - * 2 = 2^2 * mm->chunk_size
>> - *
>> - * Returns:
>> - * allocated ptr to the _buddy_block on success
>> - */
>> -struct drm_buddy_block *
>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
>> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +return s1 <= e2 && e1 >= s2;
>> +}
>> +
>> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
>> +{
>> +return s1 <= s2 && e1 >= e2;
>> +}
>> +
>> +static struct drm_buddy_block *
>> +alloc_range_bias(struct drm_buddy *mm,
>> + u64 start, u64 end,
>> + unsigned int order)
>> +{
>> +struct drm_buddy_block *block;
>> +struct drm_buddy_block *buddy;
>> +LIST_HEAD(dfs);
>> +int err;
>> +int i;
>> +
>> +end = end - 1;
>> +
>> +for (i = 0; i < mm->n_roots; ++i)
>> +list_add_tail(>roots[i]->tmp_link, );
>> +
>> +do {
>> +u64 block_start;
>> +u64 block_end;
>> +
>> +block = list_first_entry_or_null(,
>> + struct drm_buddy_block,
>> + tmp_link);
>> +if (!block)
>> +break;
>> +
>> +list_del(>tmp_link);
>> +
>> +if (drm_buddy_block_order(block) < order)
>> +continue;
>> +
>> +block_start = drm_buddy_block_offset(block);
>> +block_end = block_start + 

Re: [PATCH v9 2/6] drm: improve drm_buddy_alloc function

2022-01-21 Thread Matthew Auld

On 19/01/2022 11:37, Arunpravin wrote:

- Make drm_buddy_alloc a single function to handle
   range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
   the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
   i915 driver to drm buddy

v2:
   merged below changes to keep the build unbroken
- drm_buddy_alloc_range() becomes obsolete and may be removed
- enable ttm range allocation (fpfn / lpfn) support in i915 driver
- apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
   - Fix alignment issues and remove unnecessary list_empty check
   - add more validation checks for input arguments
   - make alloc_range() block allocations as bottom-up
   - optimize order computation logic
   - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
   - keep drm_buddy_alloc_range() function implementation for generic
 actual range allocations
   - keep alloc_range() implementation for end bias allocations

v5(Matthew Auld):
   - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
 as place->lpfn will currently always be zero for i915

v6(Matthew Auld):
   - fixup potential uaf - If we are unlucky and can't allocate
 enough memory when splitting blocks, where we temporarily
 end up with the given block and its buddy on the respective
 free list, then we need to ensure we delete both blocks,
 and no just the buddy, before potentially freeing them


Hmm, not sure we really want to squash existing bug fixes into this 
patch. Perhaps bring in [1] to the start of your series? i915_buddy is 
gone now. Alternatively I can resend such that it applies on top 
drm_buddy. Your choice.


[1] https://patchwork.freedesktop.org/patch/469806/?series=98953=1



   - fix warnings reported by kernel test robot 

Signed-off-by: Arunpravin 
---
  drivers/gpu/drm/drm_buddy.c   | 326 +-
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
  drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
  include/drm/drm_buddy.h   |  22 +-
  4 files changed, 293 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..954e31962c74 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,99 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct 
list_head *objects)
  }
  EXPORT_SYMBOL(drm_buddy_free_list);
  
-/**

- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the _buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+   return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+   return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+u64 start, u64 end,
+unsigned int order)
+{
+   struct drm_buddy_block *block;
+   struct drm_buddy_block *buddy;
+   LIST_HEAD(dfs);
+   int err;
+   int i;
+
+   end = end - 1;
+
+   for (i = 0; i < mm->n_roots; ++i)
+   list_add_tail(>roots[i]->tmp_link, );
+
+   do {
+   u64 block_start;
+   u64 block_end;
+
+   block = list_first_entry_or_null(,
+struct drm_buddy_block,
+tmp_link);
+   if (!block)
+   break;
+
+   list_del(>tmp_link);
+
+   if (drm_buddy_block_order(block) < order)
+   continue;
+
+   block_start = drm_buddy_block_offset(block);
+   block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+   if (!overlaps(start, end, block_start, block_end))
+   continue;
+
+   if (drm_buddy_block_is_allocated(block))
+   continue;
+
+   if (contains(start, end, block_start, block_end) &&
+   order == drm_buddy_block_order(block)) {
+   /*
+* Find the free block within the range.
+*/
+   if (drm_buddy_block_is_free(block))
+   return block;
+
+   continue;
+   }
+
+   if (!drm_buddy_block_is_split(block)) {
+   err = split_block(mm, block);
+   if 

[PATCH v9 2/6] drm: improve drm_buddy_alloc function

2022-01-19 Thread Arunpravin
- Make drm_buddy_alloc a single function to handle
  range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
  the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
  i915 driver to drm buddy

v2:
  merged below changes to keep the build unbroken
   - drm_buddy_alloc_range() becomes obsolete and may be removed
   - enable ttm range allocation (fpfn / lpfn) support in i915 driver
   - apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
  - Fix alignment issues and remove unnecessary list_empty check
  - add more validation checks for input arguments
  - make alloc_range() block allocations as bottom-up
  - optimize order computation logic
  - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
  - keep drm_buddy_alloc_range() function implementation for generic
actual range allocations
  - keep alloc_range() implementation for end bias allocations

v5(Matthew Auld):
  - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
as place->lpfn will currently always be zero for i915

v6(Matthew Auld):
  - fixup potential uaf - If we are unlucky and can't allocate
enough memory when splitting blocks, where we temporarily
end up with the given block and its buddy on the respective
free list, then we need to ensure we delete both blocks,
and no just the buddy, before potentially freeing them

  - fix warnings reported by kernel test robot 

Signed-off-by: Arunpravin 
---
 drivers/gpu/drm/drm_buddy.c   | 326 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
 include/drm/drm_buddy.h   |  22 +-
 4 files changed, 293 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..954e31962c74 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,99 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct 
list_head *objects)
 }
 EXPORT_SYMBOL(drm_buddy_free_list);
 
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the _buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+   return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+   return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+u64 start, u64 end,
+unsigned int order)
+{
+   struct drm_buddy_block *block;
+   struct drm_buddy_block *buddy;
+   LIST_HEAD(dfs);
+   int err;
+   int i;
+
+   end = end - 1;
+
+   for (i = 0; i < mm->n_roots; ++i)
+   list_add_tail(>roots[i]->tmp_link, );
+
+   do {
+   u64 block_start;
+   u64 block_end;
+
+   block = list_first_entry_or_null(,
+struct drm_buddy_block,
+tmp_link);
+   if (!block)
+   break;
+
+   list_del(>tmp_link);
+
+   if (drm_buddy_block_order(block) < order)
+   continue;
+
+   block_start = drm_buddy_block_offset(block);
+   block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+   if (!overlaps(start, end, block_start, block_end))
+   continue;
+
+   if (drm_buddy_block_is_allocated(block))
+   continue;
+
+   if (contains(start, end, block_start, block_end) &&
+   order == drm_buddy_block_order(block)) {
+   /*
+* Find the free block within the range.
+*/
+   if (drm_buddy_block_is_free(block))
+   return block;
+
+   continue;
+   }
+
+   if (!drm_buddy_block_is_split(block)) {
+   err = split_block(mm, block);
+   if (unlikely(err))
+   goto err_undo;
+   }
+
+   list_add(>right->tmp_link, );
+   list_add(>left->tmp_link, );
+   } while (1);
+
+   return ERR_PTR(-ENOSPC);
+
+err_undo:
+   /*
+* We really don't want to leave around a bunch of split blocks, since
+* bigger is better, so make sure we merge 

[PATCH v9 2/6] drm: improve drm_buddy_alloc function

2022-01-18 Thread Arunpravin
- Make drm_buddy_alloc a single function to handle
  range allocation and non-range allocation demands

- Implemented a new function alloc_range() which allocates
  the requested power-of-two block comply with range limitations

- Moved order computation and memory alignment logic from
  i915 driver to drm buddy

v2:
  merged below changes to keep the build unbroken
   - drm_buddy_alloc_range() becomes obsolete and may be removed
   - enable ttm range allocation (fpfn / lpfn) support in i915 driver
   - apply enhanced drm_buddy_alloc() function to i915 driver

v3(Matthew Auld):
  - Fix alignment issues and remove unnecessary list_empty check
  - add more validation checks for input arguments
  - make alloc_range() block allocations as bottom-up
  - optimize order computation logic
  - replace uint64_t with u64, which is preferred in the kernel

v4(Matthew Auld):
  - keep drm_buddy_alloc_range() function implementation for generic
actual range allocations
  - keep alloc_range() implementation for end bias allocations

v5(Matthew Auld):
  - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
as place->lpfn will currently always be zero for i915

v6(Matthew Auld):
  - fixup potential uaf - If we are unlucky and can't allocate
enough memory when splitting blocks, where we temporarily
end up with the given block and its buddy on the respective
free list, then we need to ensure we delete both blocks,
and no just the buddy, before potentially freeing them

  - fix warnings reported by kernel test robot 

Signed-off-by: Arunpravin 
---
 drivers/gpu/drm/drm_buddy.c   | 326 +-
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.c |  67 ++--
 drivers/gpu/drm/i915/i915_ttm_buddy_manager.h |   2 +
 include/drm/drm_buddy.h   |  22 +-
 4 files changed, 293 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..954e31962c74 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,99 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct 
list_head *objects)
 }
 EXPORT_SYMBOL(drm_buddy_free_list);
 
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the _buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+   return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+   return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+u64 start, u64 end,
+unsigned int order)
+{
+   struct drm_buddy_block *block;
+   struct drm_buddy_block *buddy;
+   LIST_HEAD(dfs);
+   int err;
+   int i;
+
+   end = end - 1;
+
+   for (i = 0; i < mm->n_roots; ++i)
+   list_add_tail(>roots[i]->tmp_link, );
+
+   do {
+   u64 block_start;
+   u64 block_end;
+
+   block = list_first_entry_or_null(,
+struct drm_buddy_block,
+tmp_link);
+   if (!block)
+   break;
+
+   list_del(>tmp_link);
+
+   if (drm_buddy_block_order(block) < order)
+   continue;
+
+   block_start = drm_buddy_block_offset(block);
+   block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+   if (!overlaps(start, end, block_start, block_end))
+   continue;
+
+   if (drm_buddy_block_is_allocated(block))
+   continue;
+
+   if (contains(start, end, block_start, block_end) &&
+   order == drm_buddy_block_order(block)) {
+   /*
+* Find the free block within the range.
+*/
+   if (drm_buddy_block_is_free(block))
+   return block;
+
+   continue;
+   }
+
+   if (!drm_buddy_block_is_split(block)) {
+   err = split_block(mm, block);
+   if (unlikely(err))
+   goto err_undo;
+   }
+
+   list_add(>right->tmp_link, );
+   list_add(>left->tmp_link, );
+   } while (1);
+
+   return ERR_PTR(-ENOSPC);
+
+err_undo:
+   /*
+* We really don't want to leave around a bunch of split blocks, since
+* bigger is better, so make sure we merge