On Thu, 28 Aug 2025, Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com> 
wrote:
> Replace the freelist (O(n)) used for free block management with a
> red-black tree, providing more efficient O(log n) search, insert,
> and delete operations. This improves scalability and performance
> when managing large numbers of free blocks per order (e.g., hundreds
> or thousands).
>
> In the VK-CTS memory stress subtest, the buddy manager merges
> fragmented memory and inserts freed blocks into the freelist. Since
> freelist insertion is O(n), this becomes a bottleneck as fragmentation
> increases. Benchmarking shows list_insert_sorted() consumes ~52.69% CPU
> with the freelist, compared to just 0.03% with the RB tree
> (rbtree_insert.isra.0), despite performing the same sorted insert.
>
> This also improves performance in heavily fragmented workloads,
> such as games or graphics tests that stress memory.
>
> v3(Matthew):
>   - Remove RB_EMPTY_NODE check in force_merge function.
>   - Rename rb for loop macros to have less generic names and move to
>     .c file.
>   - Make the rb node rb and link field as union.
>
> Signed-off-by: Arunpravin Paneer Selvam <arunpravin.paneersel...@amd.com>
> ---
>  drivers/gpu/drm/drm_buddy.c | 177 +++++++++++++++++++++++++-----------
>  include/drm/drm_buddy.h     |   9 +-
>  2 files changed, 131 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index a94061f373de..01ae984340cc 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -14,6 +14,41 @@
>  
>  static struct kmem_cache *slab_blocks;
>  
> +/*

If you're making them kernel-doc comments, this should be "/**".

> + * for_each_rb_free_block() - iterate over an RB tree in order
> + * @pos:     the struct type * to use as a loop cursor
> + * @root:    pointer to struct rb_root to iterate
> + * @member:  name of the rb_node field within the struct
> + */
> +#define for_each_rb_free_block(pos, root, member) \
> +     for (pos = rb_entry_safe(rb_first(root), typeof(*pos), member); \
> +          pos; \
> +          pos = rb_entry_safe(rb_next(&(pos)->member), typeof(*pos), member))
> +
> +/*
> + * for_each_rb_free_block_reverse() - iterate over an RB tree in reverse 
> order
> + * @pos:     the struct type * to use as a loop cursor
> + * @root:    pointer to struct rb_root to iterate
> + * @member:  name of the rb_node field within the struct
> + */
> +#define for_each_rb_free_block_reverse(pos, root, member) \
> +     for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member); \
> +          pos; \
> +          pos = rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), member))
> +
> +/**
> + * for_each_rb_free_block_reverse_safe() - safely iterate over an RB tree in 
> reverse order
> + * @pos:     the struct type * to use as a loop cursor.
> + * @n:               another struct type * to use as temporary storage.
> + * @root:    pointer to struct rb_root to iterate.
> + * @member:  name of the rb_node field within the struct.
> + */
> +#define for_each_rb_free_block_reverse_safe(pos, n, root, member) \
> +     for (pos = rb_entry_safe(rb_last(root), typeof(*pos), member), \
> +          n = pos ? rb_entry_safe(rb_prev(&(pos)->member), typeof(*pos), 
> member) : NULL; \
> +          pos; \
> +          pos = n, n = pos ? rb_entry_safe(rb_prev(&(pos)->member), 
> typeof(*pos), member) : NULL)
> +

The macro args need a bunch of parens around them to ensure correct
precedence.

But more importantly, why are these macros being added here instead of
rbtree.h? This just creates technical debt.

>  static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
>                                              struct drm_buddy_block *parent,
>                                              unsigned int order,
> @@ -31,6 +66,8 @@ static struct drm_buddy_block *drm_block_alloc(struct 
> drm_buddy *mm,
>       block->header |= order;
>       block->parent = parent;
>  
> +     RB_CLEAR_NODE(&block->rb);
> +
>       BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
>       return block;
>  }
> @@ -41,23 +78,53 @@ static void drm_block_free(struct drm_buddy *mm,
>       kmem_cache_free(slab_blocks, block);
>  }
>  
> -static void list_insert_sorted(struct drm_buddy *mm,
> -                            struct drm_buddy_block *block)
> +static void rbtree_insert(struct drm_buddy *mm,
> +                       struct drm_buddy_block *block)
>  {
> +     struct rb_root *root = &mm->free_tree[drm_buddy_block_order(block)];
> +     struct rb_node **link = &root->rb_node;
> +     struct rb_node *parent = NULL;
>       struct drm_buddy_block *node;
> -     struct list_head *head;
> +     u64 offset;
> +
> +     offset = drm_buddy_block_offset(block);
>  
> -     head = &mm->free_list[drm_buddy_block_order(block)];
> -     if (list_empty(head)) {
> -             list_add(&block->link, head);
> -             return;
> +     while (*link) {
> +             parent = *link;
> +             node = rb_entry(parent, struct drm_buddy_block, rb);
> +
> +             if (offset < drm_buddy_block_offset(node))
> +                     link = &parent->rb_left;
> +             else
> +                     link = &parent->rb_right;
>       }
>  
> -     list_for_each_entry(node, head, link)
> -             if (drm_buddy_block_offset(block) < 
> drm_buddy_block_offset(node))
> -                     break;
> +     rb_link_node(&block->rb, parent, link);
> +     rb_insert_color(&block->rb, root);
> +}
>  
> -     __list_add(&block->link, node->link.prev, &node->link);
> +static void rbtree_remove(struct drm_buddy *mm,
> +                       struct drm_buddy_block *block)
> +{
> +     struct rb_root *root;
> +
> +     root = &mm->free_tree[drm_buddy_block_order(block)];
> +     rb_erase(&block->rb, root);
> +
> +     RB_CLEAR_NODE(&block->rb);
> +}
> +
> +static inline struct drm_buddy_block *
> +rbtree_last_entry(struct drm_buddy *mm, unsigned int order)
> +{
> +     struct rb_node *node = rb_last(&mm->free_tree[order]);
> +
> +     return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
> +}
> +
> +static bool rbtree_is_empty(struct drm_buddy *mm, unsigned int order)
> +{
> +     return RB_EMPTY_ROOT(&mm->free_tree[order]);
>  }
>  
>  static void clear_reset(struct drm_buddy_block *block)
> @@ -70,12 +137,13 @@ static void mark_cleared(struct drm_buddy_block *block)
>       block->header |= DRM_BUDDY_HEADER_CLEAR;
>  }
>  
> -static void mark_allocated(struct drm_buddy_block *block)
> +static void mark_allocated(struct drm_buddy *mm,
> +                        struct drm_buddy_block *block)
>  {
>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>       block->header |= DRM_BUDDY_ALLOCATED;
>  
> -     list_del(&block->link);
> +     rbtree_remove(mm, block);
>  }
>  
>  static void mark_free(struct drm_buddy *mm,
> @@ -84,15 +152,16 @@ static void mark_free(struct drm_buddy *mm,
>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>       block->header |= DRM_BUDDY_FREE;
>  
> -     list_insert_sorted(mm, block);
> +     rbtree_insert(mm, block);
>  }
>  
> -static void mark_split(struct drm_buddy_block *block)
> +static void mark_split(struct drm_buddy *mm,
> +                    struct drm_buddy_block *block)
>  {
>       block->header &= ~DRM_BUDDY_HEADER_STATE;
>       block->header |= DRM_BUDDY_SPLIT;
>  
> -     list_del(&block->link);
> +     rbtree_remove(mm, block);
>  }
>  
>  static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> @@ -148,7 +217,7 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
>                               mark_cleared(parent);
>               }
>  
> -             list_del(&buddy->link);
> +             rbtree_remove(mm, buddy);
>               if (force_merge && drm_buddy_block_is_clear(buddy))
>                       mm->clear_avail -= drm_buddy_block_size(mm, buddy);
>  
> @@ -179,9 +248,11 @@ static int __force_merge(struct drm_buddy *mm,
>               return -EINVAL;
>  
>       for (i = min_order - 1; i >= 0; i--) {
> -             struct drm_buddy_block *block, *prev;
> +             struct drm_buddy_block *block, *prev_block, *first_block;
>  
> -             list_for_each_entry_safe_reverse(block, prev, 
> &mm->free_list[i], link) {
> +             first_block = rb_entry(rb_first(&mm->free_tree[i]), struct 
> drm_buddy_block, rb);
> +
> +             for_each_rb_free_block_reverse_safe(block, prev_block, 
> &mm->free_tree[i], rb) {
>                       struct drm_buddy_block *buddy;
>                       u64 block_start, block_end;
>  
> @@ -206,10 +277,14 @@ static int __force_merge(struct drm_buddy *mm,
>                        * block in the next iteration as we would free the
>                        * buddy block as part of the free function.
>                        */
> -                     if (prev == buddy)
> -                             prev = list_prev_entry(prev, link);
> +                     if (prev_block && prev_block == buddy) {
> +                             if (prev_block != first_block)
> +                                     prev_block = 
> rb_entry(rb_prev(&prev_block->rb),
> +                                                           struct 
> drm_buddy_block,
> +                                                           rb);
> +                     }
>  
> -                     list_del(&block->link);
> +                     rbtree_remove(mm, block);
>                       if (drm_buddy_block_is_clear(block))
>                               mm->clear_avail -= drm_buddy_block_size(mm, 
> block);
>  
> @@ -258,14 +333,14 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 
> chunk_size)
>  
>       BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
>  
> -     mm->free_list = kmalloc_array(mm->max_order + 1,
> -                                   sizeof(struct list_head),
> +     mm->free_tree = kmalloc_array(mm->max_order + 1,
> +                                   sizeof(struct rb_root),
>                                     GFP_KERNEL);
> -     if (!mm->free_list)
> +     if (!mm->free_tree)
>               return -ENOMEM;
>  
>       for (i = 0; i <= mm->max_order; ++i)
> -             INIT_LIST_HEAD(&mm->free_list[i]);
> +             mm->free_tree[i] = RB_ROOT;
>  
>       mm->n_roots = hweight64(size);
>  
> @@ -273,7 +348,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 
> chunk_size)
>                                 sizeof(struct drm_buddy_block *),
>                                 GFP_KERNEL);
>       if (!mm->roots)
> -             goto out_free_list;
> +             goto out_free_tree;
>  
>       offset = 0;
>       i = 0;
> @@ -312,8 +387,8 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 
> chunk_size)
>       while (i--)
>               drm_block_free(mm, mm->roots[i]);
>       kfree(mm->roots);
> -out_free_list:
> -     kfree(mm->free_list);
> +out_free_tree:
> +     kfree(mm->free_tree);
>       return -ENOMEM;
>  }
>  EXPORT_SYMBOL(drm_buddy_init);
> @@ -323,7 +398,7 @@ EXPORT_SYMBOL(drm_buddy_init);
>   *
>   * @mm: DRM buddy manager to free
>   *
> - * Cleanup memory manager resources and the freelist
> + * Cleanup memory manager resources and the freetree
>   */
>  void drm_buddy_fini(struct drm_buddy *mm)
>  {
> @@ -350,7 +425,7 @@ void drm_buddy_fini(struct drm_buddy *mm)
>       WARN_ON(mm->avail != mm->size);
>  
>       kfree(mm->roots);
> -     kfree(mm->free_list);
> +     kfree(mm->free_tree);
>  }
>  EXPORT_SYMBOL(drm_buddy_fini);
>  
> @@ -383,7 +458,7 @@ static int split_block(struct drm_buddy *mm,
>               clear_reset(block);
>       }
>  
> -     mark_split(block);
> +     mark_split(mm, block);
>  
>       return 0;
>  }
> @@ -412,7 +487,7 @@ EXPORT_SYMBOL(drm_get_buddy);
>   * @is_clear: blocks clear state
>   *
>   * Reset the clear state based on @is_clear value for each block
> - * in the freelist.
> + * in the freetree.
>   */
>  void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
>  {
> @@ -433,7 +508,7 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool 
> is_clear)
>       for (i = 0; i <= mm->max_order; ++i) {
>               struct drm_buddy_block *block;
>  
> -             list_for_each_entry_reverse(block, &mm->free_list[i], link) {
> +             for_each_rb_free_block_reverse(block, &mm->free_tree[i], rb) {
>                       if (is_clear != drm_buddy_block_is_clear(block)) {
>                               if (is_clear) {
>                                       mark_cleared(block);
> @@ -641,7 +716,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
>       for (i = order; i <= mm->max_order; ++i) {
>               struct drm_buddy_block *tmp_block;
>  
> -             list_for_each_entry_reverse(tmp_block, &mm->free_list[i], link) 
> {
> +             for_each_rb_free_block_reverse(tmp_block, &mm->free_tree[i], 
> rb) {
>                       if (block_incompatible(tmp_block, flags))
>                               continue;
>  
> @@ -667,7 +742,7 @@ get_maxblock(struct drm_buddy *mm, unsigned int order,
>  }
>  
>  static struct drm_buddy_block *
> -alloc_from_freelist(struct drm_buddy *mm,
> +alloc_from_freetree(struct drm_buddy *mm,
>                   unsigned int order,
>                   unsigned long flags)
>  {
> @@ -684,7 +759,7 @@ alloc_from_freelist(struct drm_buddy *mm,
>               for (tmp = order; tmp <= mm->max_order; ++tmp) {
>                       struct drm_buddy_block *tmp_block;
>  
> -                     list_for_each_entry_reverse(tmp_block, 
> &mm->free_list[tmp], link) {
> +                     for_each_rb_free_block_reverse(tmp_block, 
> &mm->free_tree[tmp], rb) {
>                               if (block_incompatible(tmp_block, flags))
>                                       continue;
>  
> @@ -700,10 +775,8 @@ alloc_from_freelist(struct drm_buddy *mm,
>       if (!block) {
>               /* Fallback method */
>               for (tmp = order; tmp <= mm->max_order; ++tmp) {
> -                     if (!list_empty(&mm->free_list[tmp])) {
> -                             block = list_last_entry(&mm->free_list[tmp],
> -                                                     struct drm_buddy_block,
> -                                                     link);
> +                     if (!rbtree_is_empty(mm, tmp)) {
> +                             block = rbtree_last_entry(mm, tmp);
>                               if (block)
>                                       break;
>                       }
> @@ -771,7 +844,7 @@ static int __alloc_range(struct drm_buddy *mm,
>  
>               if (contains(start, end, block_start, block_end)) {
>                       if (drm_buddy_block_is_free(block)) {
> -                             mark_allocated(block);
> +                             mark_allocated(mm, block);
>                               total_allocated += drm_buddy_block_size(mm, 
> block);
>                               mm->avail -= drm_buddy_block_size(mm, block);
>                               if (drm_buddy_block_is_clear(block))
> @@ -849,7 +922,6 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
>  {
>       u64 rhs_offset, lhs_offset, lhs_size, filled;
>       struct drm_buddy_block *block;
> -     struct list_head *list;
>       LIST_HEAD(blocks_lhs);
>       unsigned long pages;
>       unsigned int order;
> @@ -862,11 +934,10 @@ static int __alloc_contig_try_harder(struct drm_buddy 
> *mm,
>       if (order == 0)
>               return -ENOSPC;
>  
> -     list = &mm->free_list[order];
> -     if (list_empty(list))
> +     if (rbtree_is_empty(mm, order))
>               return -ENOSPC;
>  
> -     list_for_each_entry_reverse(block, list, link) {
> +     for_each_rb_free_block_reverse(block, &mm->free_tree[order], rb) {
>               /* Allocate blocks traversing RHS */
>               rhs_offset = drm_buddy_block_offset(block);
>               err =  __drm_buddy_alloc_range(mm, rhs_offset, size,
> @@ -976,7 +1047,7 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
>       list_add(&block->tmp_link, &dfs);
>       err =  __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
>       if (err) {
> -             mark_allocated(block);
> +             mark_allocated(mm, block);
>               mm->avail -= drm_buddy_block_size(mm, block);
>               if (drm_buddy_block_is_clear(block))
>                       mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -999,8 +1070,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>               return  __drm_buddy_alloc_range_bias(mm, start, end,
>                                                    order, flags);
>       else
> -             /* Allocate from freelist */
> -             return alloc_from_freelist(mm, order, flags);
> +             /* Allocate from freetree */
> +             return alloc_from_freetree(mm, order, flags);
>  }
>  
>  /**
> @@ -1017,8 +1088,8 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   * alloc_range_bias() called on range limitations, which traverses
>   * the tree and returns the desired block.
>   *
> - * alloc_from_freelist() called when *no* range restrictions
> - * are enforced, which picks the block from the freelist.
> + * alloc_from_freetree() called when *no* range restrictions
> + * are enforced, which picks the block from the freetree.
>   *
>   * Returns:
>   * 0 on success, error code on failure.
> @@ -1120,7 +1191,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>                       }
>               } while (1);
>  
> -             mark_allocated(block);
> +             mark_allocated(mm, block);
>               mm->avail -= drm_buddy_block_size(mm, block);
>               if (drm_buddy_block_is_clear(block))
>                       mm->clear_avail -= drm_buddy_block_size(mm, block);
> @@ -1204,7 +1275,7 @@ void drm_buddy_print(struct drm_buddy *mm, struct 
> drm_printer *p)
>               struct drm_buddy_block *block;
>               u64 count = 0, free;
>  
> -             list_for_each_entry(block, &mm->free_list[order], link) {
> +             for_each_rb_free_block(block, &mm->free_tree[order], rb) {
>                       BUG_ON(!drm_buddy_block_is_free(block));
>                       count++;
>               }
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 513837632b7d..091823592034 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -10,6 +10,7 @@
>  #include <linux/list.h>
>  #include <linux/slab.h>
>  #include <linux/sched.h>
> +#include <linux/rbtree.h>
>  
>  #include <drm/drm_print.h>
>  
> @@ -53,7 +54,11 @@ struct drm_buddy_block {
>        * a list, if so desired. As soon as the block is freed with
>        * drm_buddy_free* ownership is given back to the mm.
>        */
> -     struct list_head link;
> +     union {
> +             struct rb_node rb;
> +             struct list_head link;
> +     };
> +
>       struct list_head tmp_link;
>  };
>  
> @@ -68,7 +73,7 @@ struct drm_buddy_block {
>   */
>  struct drm_buddy {
>       /* Maintain a free list for each order. */
> -     struct list_head *free_list;
> +     struct rb_root *free_tree;
>  
>       /*
>        * Maintain explicit binary tree(s) to track the allocation of the
>
> base-commit: f4c75f975cf50fa2e1fd96c5aafe5aa62e55fbe4

-- 
Jani Nikula, Intel

Reply via email to