Re: drm(4) memory allocation diff

2021-01-01 Thread Mark Kettenis
> Date: Fri, 1 Jan 2021 20:10:45 +1100
> From: Jonathan Gray 

New diff at the end of this mail.  Please test this one instead of the
previous one.

> On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
> > The diff below changes the emulated Linux memory allocation functions
> > a bit such that they only use malloc(9) for allocations smaller than a
> > page.  This reduces pressure on the "interrupt safe" map and hopefully
> > will avoid the
> > 
> > uvm_mapent_alloc: out of static map entries
> > 
> > messages that some people have seen more often in the last few months.
> > It also could help with some memory allocation issues seem by
> > amdgpu(4) users.
> > 
> > The downside of this approach is that memory leaks will be harder to
> > spot as the larger memory allocations are no longer included in the
> > DRM type as reported by vmstat -m.  Another downside is that this no
> > longer caps the amount of kernel memory that can be allocated by
> > drm(4).  If that turns out to be a problem, we can impose a limit on
> > the amount of memory that can be allocated this way.
> > 
> > The implementation needs to keep track of the size of each allocated
> > memory block.  This is done using a red-black tree.  Our kernel uses
> > red-black trees in similar situations but I wouldn't call myself an
> > expert in the area of data structures so a there may be a better
> > approach.
> > 
> > Some real-life testing would be appreciated.
> > 
> 
> Thanks for looking into this, I'm interested to hear how people go with
> it.  Some comments inline below.
> 
> > 
> > Index: dev/pci/drm/drm_linux.c
> > ===
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.74
> > diff -u -p -r1.74 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -  1.74
> > +++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -
> > @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
> > return (num);
> >  }
> >  
> > +struct vmalloc_entry {
> > +   const void  *addr;
> > +   size_t  size;
> > +   RBT_ENTRY(vmalloc_entry) vmalloc_node;
> > +};
> > +
> > +struct pool vmalloc_pool;
> > +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> > +
> > +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +static inline int
> > +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry 
> > *b)
> > +{
> > +   vaddr_t va = (vaddr_t)a->addr;
> > +   vaddr_t vb = (vaddr_t)b->addr;
> > +
> > +   return va < vb ? -1 : va > vb;
> > +}
> > +
> > +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +bool
> > +is_vmalloc_addr(const void *addr)
> > +{
> > +   struct vmalloc_entry key;
> > +   struct vmalloc_entry *entry;
> > +
> > +   key.addr = addr;
> > +   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > +   return (entry != NULL);
> > +}
> > +
> > +void *
> > +vmalloc(unsigned long size)
> > +{
> > +   struct vmalloc_entry *entry;
> > +   void *addr;
> > +
> > +   size = roundup(size, PAGE_SIZE);
> 
> this could just be round_page(size)

Sure.

> > +   addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> > +   if (addr) {
> > +   entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > +   entry->addr = addr;
> > +   entry->size = size;
> > +   RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > +   }
> > +
> > +   return addr;
> > +}
> > +
> > +void *
> > +vzalloc(unsigned long size)
> > +{
> > +   struct vmalloc_entry *entry;
> > +   void *addr;
> > +
> > +   size = roundup(size, PAGE_SIZE);
> > +   addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> > +   if (addr) {
> > +   entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > +   entry->addr = addr;
> > +   entry->size = size;
> > +   RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > +   }
> > +
> > +   return addr;
> > +}
> > +
> > +void
> > +vfree(const void *addr)
> > +{
> > +   struct vmalloc_entry key;
> > +   struct vmalloc_entry *entry;
> > +
> > +   key.addr = addr;
> > +   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > +   if (entry == NULL)
> > +   panic("%s: non vmalloced addr %p", __func__, addr);
> > +
> > +   RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> > +   km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> > +   pool_put(&vmalloc_pool, entry);
> > +}
> > +
> > +void *
> > +kvmalloc(size_t size, gfp_t flags)
> > +{
> > +   if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > +   return malloc(size, M_DRM, flags);
> > +   return vmalloc(size);
> > +}
> > +
> > +void *
> > +kvzalloc(size_t size, gfp_t flags)
> > +{
> > +   if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > +   return malloc(size, M_DRM, flags | M_ZERO);
> > +   return vzalloc(size);
> > +}
> 
> these should be (flags & GFP_KERNEL) != GFP_KERNEL to handle calls like
> 
> ttm->pages = kvmalloc_array

Re: drm(4) memory allocation diff

2021-01-01 Thread Mark Kettenis
> Date: Fri, 1 Jan 2021 09:51:27 +0100
> From: Otto Moerbeek 
> 
> On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
> 
> > The diff below changes the emulated Linux memory allocation functions
> > a bit such that they only use malloc(9) for allocations smaller than a
> > page.  This reduces pressure on the "interrupt safe" map and hopefully
> > will avoid the
> > 
> > uvm_mapent_alloc: out of static map entries
> > 
> > messages that some people have seen more often in the last few months.
> > It also could help with some memory allocation issues seem by
> > amdgpu(4) users.
> > 
> > The downside of this approach is that memory leaks will be harder to
> > spot as the larger memory allocations are no longer included in the
> > DRM type as reported by vmstat -m.  Another downside is that this no
> > longer caps the amount of kernel memory that can be allocated by
> > drm(4).  If that turns out to be a problem, we can impose a limit on
> > the amount of memory that can be allocated this way.
> > 
> > The implementation needs to keep track of the size of each allocated
> > memory block.  This is done using a red-black tree.  Our kernel uses
> > red-black trees in similar situations but I wouldn't call myself an
> > expert in the area of data structures so a there may be a better
> > approach.
> > 
> > Some real-life testing would be appreciated.
> > 
> 
> A hash table (as used in userland) would lead to an amortized
> complexity of O(1) for an insert or delete, while RB-tree would be
> O(log n) for a tree of size n. The drawbakc is that a hash table
> is more complex. Basically it depend on the amount of insert and
> deletes you expect if it's worth the trouble.

I'll probably leave it as-is then unless profiling shows that this is
performance critical.

> A few nits inline,
> 
>   -Otto
> 
> > Index: dev/pci/drm/drm_linux.c
> > ===
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.74
> > diff -u -p -r1.74 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -  1.74
> > +++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -
> > @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
> > return (num);
> >  }
> >  
> > +struct vmalloc_entry {
> > +   const void  *addr;
> > +   size_t  size;
> > +   RBT_ENTRY(vmalloc_entry) vmalloc_node;
> > +};
> > +
> > +struct pool vmalloc_pool;
> > +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> > +
> > +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +static inline int
> > +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry 
> > *b)
> > +{
> > +   vaddr_t va = (vaddr_t)a->addr;
> > +   vaddr_t vb = (vaddr_t)b->addr;
> > +
> > +   return va < vb ? -1 : va > vb;
> > +}
> > +
> > +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +bool
> > +is_vmalloc_addr(const void *addr)
> > +{
> > +   struct vmalloc_entry key;
> > +   struct vmalloc_entry *entry;
> > +
> > +   key.addr = addr;
> > +   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > +   return (entry != NULL);
> > +}
> > +
> > +void *
> > +vmalloc(unsigned long size)
> 
> 
> Why sometimes unsigned long and sometimes size_t for sizes?

Because that's the way things are in Linux.

> > +{
> > +   struct vmalloc_entry *entry;
> > +   void *addr;
> > +
> > +   size = roundup(size, PAGE_SIZE);
> > +   addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> > +   if (addr) {
> > +   entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > +   entry->addr = addr;
> > +   entry->size = size;
> > +   RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > +   }
> > +
> > +   return addr;
> > +}
> > +
> > +void *
> > +vzalloc(unsigned long size)
> > +{
> > +   struct vmalloc_entry *entry;
> > +   void *addr;
> > +
> > +   size = roundup(size, PAGE_SIZE);
> > +   addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> > +   if (addr) {
> > +   entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > +   entry->addr = addr;
> > +   entry->size = size;
> > +   RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > +   }
> > +
> > +   return addr;
> > +}
> > +
> > +void
> > +vfree(const void *addr)
> > +{
> > +   struct vmalloc_entry key;
> > +   struct vmalloc_entry *entry;
> > +
> > +   key.addr = addr;
> > +   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > +   if (entry == NULL)
> > +   panic("%s: non vmalloced addr %p", __func__, addr);
> > +
> > +   RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> > +   km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> > +   pool_put(&vmalloc_pool, entry);
> > +}
> > +
> > +void *
> > +kvmalloc(size_t size, gfp_t flags)
> > +{
> > +   if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > +   return malloc(size, M_DRM, flags);
> > +   return vmalloc(size);
> > +}
> > +
>

Re: drm(4) memory allocation diff

2021-01-01 Thread Jonathan Gray
On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
> The diff below changes the emulated Linux memory allocation functions
> a bit such that they only use malloc(9) for allocations smaller than a
> page.  This reduces pressure on the "interrupt safe" map and hopefully
> will avoid the
> 
> uvm_mapent_alloc: out of static map entries
> 
> messages that some people have seen more often in the last few months.
> It also could help with some memory allocation issues seem by
> amdgpu(4) users.
> 
> The downside of this approach is that memory leaks will be harder to
> spot as the larger memory allocations are no longer included in the
> DRM type as reported by vmstat -m.  Another downside is that this no
> longer caps the amount of kernel memory that can be allocated by
> drm(4).  If that turns out to be a problem, we can impose a limit on
> the amount of memory that can be allocated this way.
> 
> The implementation needs to keep track of the size of each allocated
> memory block.  This is done using a red-black tree.  Our kernel uses
> red-black trees in similar situations but I wouldn't call myself an
> expert in the area of data structures so a there may be a better
> approach.
> 
> Some real-life testing would be appreciated.
> 

Thanks for looking into this, I'm interested to hear how people go with
it.  Some comments inline below.

> 
> Index: dev/pci/drm/drm_linux.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 drm_linux.c
> --- dev/pci/drm/drm_linux.c   31 Dec 2020 06:31:55 -  1.74
> +++ dev/pci/drm/drm_linux.c   31 Dec 2020 20:44:38 -
> @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
>   return (num);
>  }
>  
> +struct vmalloc_entry {
> + const void  *addr;
> + size_t  size;
> + RBT_ENTRY(vmalloc_entry) vmalloc_node;
> +};
> +
> +struct pool vmalloc_pool;
> +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> +
> +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> +
> +static inline int
> +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry *b)
> +{
> + vaddr_t va = (vaddr_t)a->addr;
> + vaddr_t vb = (vaddr_t)b->addr;
> +
> + return va < vb ? -1 : va > vb;
> +}
> +
> +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> +
> +bool
> +is_vmalloc_addr(const void *addr)
> +{
> + struct vmalloc_entry key;
> + struct vmalloc_entry *entry;
> +
> + key.addr = addr;
> + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> + return (entry != NULL);
> +}
> +
> +void *
> +vmalloc(unsigned long size)
> +{
> + struct vmalloc_entry *entry;
> + void *addr;
> +
> + size = roundup(size, PAGE_SIZE);

this could just be round_page(size)

> + addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> + if (addr) {
> + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> + entry->addr = addr;
> + entry->size = size;
> + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> + }
> +
> + return addr;
> +}
> +
> +void *
> +vzalloc(unsigned long size)
> +{
> + struct vmalloc_entry *entry;
> + void *addr;
> +
> + size = roundup(size, PAGE_SIZE);
> + addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> + if (addr) {
> + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> + entry->addr = addr;
> + entry->size = size;
> + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> + }
> +
> + return addr;
> +}
> +
> +void
> +vfree(const void *addr)
> +{
> + struct vmalloc_entry key;
> + struct vmalloc_entry *entry;
> +
> + key.addr = addr;
> + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> + if (entry == NULL)
> + panic("%s: non vmalloced addr %p", __func__, addr);
> +
> + RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> + km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> + pool_put(&vmalloc_pool, entry);
> +}
> +
> +void *
> +kvmalloc(size_t size, gfp_t flags)
> +{
> + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> + return malloc(size, M_DRM, flags);
> + return vmalloc(size);
> +}
> +
> +void *
> +kvzalloc(size_t size, gfp_t flags)
> +{
> + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> + return malloc(size, M_DRM, flags | M_ZERO);
> + return vzalloc(size);
> +}

these should be (flags & GFP_KERNEL) != GFP_KERNEL to handle calls like

ttm->pages = kvmalloc_array(ttm->num_pages, sizeof(void*),
GFP_KERNEL | __GFP_ZERO);

> +
> +void
> +kvfree(const void *addr)
> +{
> + if (is_vmalloc_addr(addr))
> + vfree(addr);
> + else
> + free((void *)addr, M_DRM, 0);
> +}
> +
>  struct vm_page *
>  alloc_pages(unsigned int gfp_mask, unsigned int order)
>  {
> @@ -1939,6 +2049,10 @@ drm_linux

Re: drm(4) memory allocation diff

2021-01-01 Thread Otto Moerbeek
On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:

> The diff below changes the emulated Linux memory allocation functions
> a bit such that they only use malloc(9) for allocations smaller than a
> page.  This reduces pressure on the "interrupt safe" map and hopefully
> will avoid the
> 
> uvm_mapent_alloc: out of static map entries
> 
> messages that some people have seen more often in the last few months.
> It also could help with some memory allocation issues seem by
> amdgpu(4) users.
> 
> The downside of this approach is that memory leaks will be harder to
> spot as the larger memory allocations are no longer included in the
> DRM type as reported by vmstat -m.  Another downside is that this no
> longer caps the amount of kernel memory that can be allocated by
> drm(4).  If that turns out to be a problem, we can impose a limit on
> the amount of memory that can be allocated this way.
> 
> The implementation needs to keep track of the size of each allocated
> memory block.  This is done using a red-black tree.  Our kernel uses
> red-black trees in similar situations but I wouldn't call myself an
> expert in the area of data structures so a there may be a better
> approach.
> 
> Some real-life testing would be appreciated.
> 

A hash table (as used in userland) would lead to an amortized
complexity of O(1) for an insert or delete, while RB-tree would be
O(log n) for a tree of size n. The drawbakc is that a hash table
is more complex. Basically it depend on the amount of insert and
deletes you expect if it's worth the trouble.

A few nits inline,

-Otto

> Index: dev/pci/drm/drm_linux.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 drm_linux.c
> --- dev/pci/drm/drm_linux.c   31 Dec 2020 06:31:55 -  1.74
> +++ dev/pci/drm/drm_linux.c   31 Dec 2020 20:44:38 -
> @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
>   return (num);
>  }
>  
> +struct vmalloc_entry {
> + const void  *addr;
> + size_t  size;
> + RBT_ENTRY(vmalloc_entry) vmalloc_node;
> +};
> +
> +struct pool vmalloc_pool;
> +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> +
> +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> +
> +static inline int
> +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry *b)
> +{
> + vaddr_t va = (vaddr_t)a->addr;
> + vaddr_t vb = (vaddr_t)b->addr;
> +
> + return va < vb ? -1 : va > vb;
> +}
> +
> +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> +
> +bool
> +is_vmalloc_addr(const void *addr)
> +{
> + struct vmalloc_entry key;
> + struct vmalloc_entry *entry;
> +
> + key.addr = addr;
> + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> + return (entry != NULL);
> +}
> +
> +void *
> +vmalloc(unsigned long size)


Why sometimes unsigned long and sometimes size_t for sizes?

> +{
> + struct vmalloc_entry *entry;
> + void *addr;
> +
> + size = roundup(size, PAGE_SIZE);
> + addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> + if (addr) {
> + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> + entry->addr = addr;
> + entry->size = size;
> + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> + }
> +
> + return addr;
> +}
> +
> +void *
> +vzalloc(unsigned long size)
> +{
> + struct vmalloc_entry *entry;
> + void *addr;
> +
> + size = roundup(size, PAGE_SIZE);
> + addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> + if (addr) {
> + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> + entry->addr = addr;
> + entry->size = size;
> + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> + }
> +
> + return addr;
> +}
> +
> +void
> +vfree(const void *addr)
> +{
> + struct vmalloc_entry key;
> + struct vmalloc_entry *entry;
> +
> + key.addr = addr;
> + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> + if (entry == NULL)
> + panic("%s: non vmalloced addr %p", __func__, addr);
> +
> + RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> + km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> + pool_put(&vmalloc_pool, entry);
> +}
> +
> +void *
> +kvmalloc(size_t size, gfp_t flags)
> +{
> + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> + return malloc(size, M_DRM, flags);
> + return vmalloc(size);
> +}
> +
> +void *
> +kvzalloc(size_t size, gfp_t flags)
> +{
> + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> + return malloc(size, M_DRM, flags | M_ZERO);
> + return vzalloc(size);
> +}
> +
> +void
> +kvfree(const void *addr)
> +{
> + if (is_vmalloc_addr(addr))
> + vfree(addr);
> + else
> + free((void *)addr, M_DRM, 0);
> +}
> +
>  struct vm_page *
>  

drm(4) memory allocation diff

2020-12-31 Thread Mark Kettenis
The diff below changes the emulated Linux memory allocation functions
a bit such that they only use malloc(9) for allocations smaller than a
page.  This reduces pressure on the "interrupt safe" map and hopefully
will avoid the

uvm_mapent_alloc: out of static map entries

messages that some people have seen more often in the last few months.
It also could help with some memory allocation issues seem by
amdgpu(4) users.

The downside of this approach is that memory leaks will be harder to
spot as the larger memory allocations are no longer included in the
DRM type as reported by vmstat -m.  Another downside is that this no
longer caps the amount of kernel memory that can be allocated by
drm(4).  If that turns out to be a problem, we can impose a limit on
the amount of memory that can be allocated this way.

The implementation needs to keep track of the size of each allocated
memory block.  This is done using a red-black tree.  Our kernel uses
red-black trees in similar situations but I wouldn't call myself an
expert in the area of data structures so a there may be a better
approach.

Some real-life testing would be appreciated.


Index: dev/pci/drm/drm_linux.c
===
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.74
diff -u -p -r1.74 drm_linux.c
--- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -  1.74
+++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -
@@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
return (num);
 }
 
+struct vmalloc_entry {
+   const void  *addr;
+   size_t  size;
+   RBT_ENTRY(vmalloc_entry) vmalloc_node;
+};
+
+struct pool vmalloc_pool;
+RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
+
+RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
+
+static inline int
+vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry *b)
+{
+   vaddr_t va = (vaddr_t)a->addr;
+   vaddr_t vb = (vaddr_t)b->addr;
+
+   return va < vb ? -1 : va > vb;
+}
+
+RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
+
+bool
+is_vmalloc_addr(const void *addr)
+{
+   struct vmalloc_entry key;
+   struct vmalloc_entry *entry;
+
+   key.addr = addr;
+   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
+   return (entry != NULL);
+}
+
+void *
+vmalloc(unsigned long size)
+{
+   struct vmalloc_entry *entry;
+   void *addr;
+
+   size = roundup(size, PAGE_SIZE);
+   addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
+   if (addr) {
+   entry = pool_get(&vmalloc_pool, PR_WAITOK);
+   entry->addr = addr;
+   entry->size = size;
+   RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
+   }
+
+   return addr;
+}
+
+void *
+vzalloc(unsigned long size)
+{
+   struct vmalloc_entry *entry;
+   void *addr;
+
+   size = roundup(size, PAGE_SIZE);
+   addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
+   if (addr) {
+   entry = pool_get(&vmalloc_pool, PR_WAITOK);
+   entry->addr = addr;
+   entry->size = size;
+   RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
+   }
+
+   return addr;
+}
+
+void
+vfree(const void *addr)
+{
+   struct vmalloc_entry key;
+   struct vmalloc_entry *entry;
+
+   key.addr = addr;
+   entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
+   if (entry == NULL)
+   panic("%s: non vmalloced addr %p", __func__, addr);
+
+   RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
+   km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
+   pool_put(&vmalloc_pool, entry);
+}
+
+void *
+kvmalloc(size_t size, gfp_t flags)
+{
+   if (flags != GFP_KERNEL || size < PAGE_SIZE)
+   return malloc(size, M_DRM, flags);
+   return vmalloc(size);
+}
+
+void *
+kvzalloc(size_t size, gfp_t flags)
+{
+   if (flags != GFP_KERNEL || size < PAGE_SIZE)
+   return malloc(size, M_DRM, flags | M_ZERO);
+   return vzalloc(size);
+}
+
+void
+kvfree(const void *addr)
+{
+   if (is_vmalloc_addr(addr))
+   vfree(addr);
+   else
+   free((void *)addr, M_DRM, 0);
+}
+
 struct vm_page *
 alloc_pages(unsigned int gfp_mask, unsigned int order)
 {
@@ -1939,6 +2049,10 @@ drm_linux_init(void)
 
pool_init(&idr_pool, sizeof(struct idr_entry), 0, IPL_TTY, 0,
"idrpl", NULL);
+
+   pool_init(&vmalloc_pool, sizeof(struct vmalloc_entry), 0, IPL_NONE, 0,
+   "vmallocpl", NULL);
+   RBT_INIT(vmalloc_tree, &vmalloc_tree);
 }
 
 void
Index: dev/pci/drm/include/linux/mm.h
===
RCS file: /cvs/src/sys/dev/pci/drm/include/linux/mm.h,v
retrieving revision 1.6
diff -u -p -r1.6 mm.h
--- dev/pci/drm/include/linux/mm.h  7 Dec 2020 09:10:42 -   1.6