> Date: Fri, 1 Jan 2021 09:51:27 +0100
> From: Otto Moerbeek <[email protected]>
>
> On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
>
> > The diff below changes the emulated Linux memory allocation functions
> > a bit such that they only use malloc(9) for allocations smaller than a
> > page. This reduces pressure on the "interrupt safe" map and hopefully
> > will avoid the
> >
> > uvm_mapent_alloc: out of static map entries
> >
> > messages that some people have seen more often in the last few months.
> > It also could help with some memory allocation issues seem by
> > amdgpu(4) users.
> >
> > The downside of this approach is that memory leaks will be harder to
> > spot as the larger memory allocations are no longer included in the
> > DRM type as reported by vmstat -m. Another downside is that this no
> > longer caps the amount of kernel memory that can be allocated by
> > drm(4). If that turns out to be a problem, we can impose a limit on
> > the amount of memory that can be allocated this way.
> >
> > The implementation needs to keep track of the size of each allocated
> > memory block. This is done using a red-black tree. Our kernel uses
> > red-black trees in similar situations but I wouldn't call myself an
> > expert in the area of data structures so a there may be a better
> > approach.
> >
> > Some real-life testing would be appreciated.
> >
>
> A hash table (as used in userland) would lead to an amortized
> complexity of O(1) for an insert or delete, while RB-tree would be
> O(log n) for a tree of size n. The drawbakc is that a hash table
> is more complex. Basically it depend on the amount of insert and
> deletes you expect if it's worth the trouble.
I'll probably leave it as-is then unless profiling shows that this is
performance critical.
> A few nits inline,
>
> -Otto
>
> > Index: dev/pci/drm/drm_linux.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.74
> > diff -u -p -r1.74 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -0000 1.74
> > +++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -0000
> > @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
> > return (num);
> > }
> >
> > +struct vmalloc_entry {
> > + const void *addr;
> > + size_t size;
> > + RBT_ENTRY(vmalloc_entry) vmalloc_node;
> > +};
> > +
> > +struct pool vmalloc_pool;
> > +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> > +
> > +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +static inline int
> > +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry
> > *b)
> > +{
> > + vaddr_t va = (vaddr_t)a->addr;
> > + vaddr_t vb = (vaddr_t)b->addr;
> > +
> > + return va < vb ? -1 : va > vb;
> > +}
> > +
> > +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +bool
> > +is_vmalloc_addr(const void *addr)
> > +{
> > + struct vmalloc_entry key;
> > + struct vmalloc_entry *entry;
> > +
> > + key.addr = addr;
> > + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > + return (entry != NULL);
> > +}
> > +
> > +void *
> > +vmalloc(unsigned long size)
>
>
> Why sometimes unsigned long and sometimes size_t for sizes?
Because that's the way things are in Linux.
> > +{
> > + struct vmalloc_entry *entry;
> > + void *addr;
> > +
> > + size = roundup(size, PAGE_SIZE);
> > + addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> > + if (addr) {
> > + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > + entry->addr = addr;
> > + entry->size = size;
> > + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > + }
> > +
> > + return addr;
> > +}
> > +
> > +void *
> > +vzalloc(unsigned long size)
> > +{
> > + struct vmalloc_entry *entry;
> > + void *addr;
> > +
> > + size = roundup(size, PAGE_SIZE);
> > + addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> > + if (addr) {
> > + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > + entry->addr = addr;
> > + entry->size = size;
> > + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > + }
> > +
> > + return addr;
> > +}
> > +
> > +void
> > +vfree(const void *addr)
> > +{
> > + struct vmalloc_entry key;
> > + struct vmalloc_entry *entry;
> > +
> > + key.addr = addr;
> > + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > + if (entry == NULL)
> > + panic("%s: non vmalloced addr %p", __func__, addr);
> > +
> > + RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> > + km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> > + pool_put(&vmalloc_pool, entry);
> > +}
> > +
> > +void *
> > +kvmalloc(size_t size, gfp_t flags)
> > +{
> > + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > + return malloc(size, M_DRM, flags);
> > + return vmalloc(size);
> > +}
> > +
> > +void *
> > +kvzalloc(size_t size, gfp_t flags)
> > +{
> > + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > + return malloc(size, M_DRM, flags | M_ZERO);
> > + return vzalloc(size);
> > +}
> > +
> > +void
> > +kvfree(const void *addr)
> > +{
> > + if (is_vmalloc_addr(addr))
> > + vfree(addr);
> > + else
> > + free((void *)addr, M_DRM, 0);
> > +}
> > +
> > struct vm_page *
> > alloc_pages(unsigned int gfp_mask, unsigned int order)
> > {
> > @@ -1939,6 +2049,10 @@ drm_linux_init(void)
> >
> > pool_init(&idr_pool, sizeof(struct idr_entry), 0, IPL_TTY, 0,
> > "idrpl", NULL);
> > +
> > + pool_init(&vmalloc_pool, sizeof(struct vmalloc_entry), 0, IPL_NONE, 0,
> > + "vmallocpl", NULL);
> > + RBT_INIT(vmalloc_tree, &vmalloc_tree);
> > }
> >
> > void
> > Index: dev/pci/drm/include/linux/mm.h
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/include/linux/mm.h,v
> > retrieving revision 1.6
> > diff -u -p -r1.6 mm.h
> > --- dev/pci/drm/include/linux/mm.h 7 Dec 2020 09:10:42 -0000 1.6
> > +++ dev/pci/drm/include/linux/mm.h 31 Dec 2020 20:44:38 -0000
> > @@ -30,38 +30,26 @@
> > #define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
> > #define PFN_PHYS(x) ((x) << PAGE_SHIFT)
> >
> > -#define is_vmalloc_addr(ptr) true
> > +bool is_vmalloc_addr(const void *);
> >
> > -static inline void *
> > -kvmalloc(size_t size, gfp_t flags)
> > -{
> > - return malloc(size, M_DRM, flags);
> > -}
> > +void *kvmalloc(size_t, gfp_t);
> > +void *kvzalloc(size_t, gfp_t);
> > +void kvfree(const void *);
> >
> > static inline void *
> > -kvmalloc_array(size_t n, size_t size, int flags)
> > +kvmalloc_array(size_t n, size_t size, gfp_t flags)
> > {
> > if (n != 0 && SIZE_MAX / n < size)
>
> You might want to avoid the / in almost all cases and use the
> construct from userland reallocaarry.c
>
>
> > return NULL;
> > - return malloc(n * size, M_DRM, flags);
> > -}
> > -
> > -static inline void *
> > -kvcalloc(size_t n, size_t size, int flags)
> > -{
> > - return kvmalloc_array(n, size, flags | M_ZERO);
> > + return kvmalloc(n * size, flags);
> > }
> >
> > static inline void *
> > -kvzalloc(size_t size, int flags)
> > -{
> > - return malloc(size, M_DRM, flags | M_ZERO);
> > -}
> > -
> > -static inline void
> > -kvfree(const void *objp)
> > +kvcalloc(size_t n, size_t size, gfp_t flags)
> > {
> > - free((void *)objp, M_DRM, 0);
> > + if (n != 0 && SIZE_MAX / n < size)
> > + return NULL;
> > + return kvzalloc(n * size, flags);
> > }
> >
> > static inline long
> > Index: dev/pci/drm/include/linux/vmalloc.h
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/include/linux/vmalloc.h,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 vmalloc.h
> > --- dev/pci/drm/include/linux/vmalloc.h 14 Apr 2019 10:14:53 -0000
> > 1.1
> > +++ dev/pci/drm/include/linux/vmalloc.h 31 Dec 2020 20:44:38 -0000
> > @@ -25,25 +25,10 @@
> > #include <linux/overflow.h>
> >
> > void *vmap(struct vm_page **, unsigned int, unsigned long, pgprot_t);
> > -void vunmap(void *, size_t);
> > -
> > -static inline void *
> > -vmalloc(unsigned long size)
> > -{
> > - return malloc(size, M_DRM, M_WAITOK | M_CANFAIL);
> > -}
> > -
> > -static inline void *
> > -vzalloc(unsigned long size)
> > -{
> > - return malloc(size, M_DRM, M_WAITOK | M_CANFAIL | M_ZERO);
> > -}
> > -
> > -static inline void
> > -vfree(void *objp)
> > -{
> > - free(objp, M_DRM, 0);
> > -}
> > +void vunmap(void *, size_t);
> >
> > +void *vmalloc(unsigned long);
> > +void *vzalloc(unsigned long);
> > +void vfree(const void *);
> >
> > #endif
> >
>