On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
> The diff below changes the emulated Linux memory allocation functions
> a bit such that they only use malloc(9) for allocations smaller than a
> page. This reduces pressure on the "interrupt safe" map and hopefully
> will avoid the
>
> uvm_mapent_alloc: out of static map entries
>
> messages that some people have seen more often in the last few months.
> It also could help with some memory allocation issues seem by
> amdgpu(4) users.
>
> The downside of this approach is that memory leaks will be harder to
> spot as the larger memory allocations are no longer included in the
> DRM type as reported by vmstat -m. Another downside is that this no
> longer caps the amount of kernel memory that can be allocated by
> drm(4). If that turns out to be a problem, we can impose a limit on
> the amount of memory that can be allocated this way.
>
> The implementation needs to keep track of the size of each allocated
> memory block. This is done using a red-black tree. Our kernel uses
> red-black trees in similar situations but I wouldn't call myself an
> expert in the area of data structures so a there may be a better
> approach.
>
> Some real-life testing would be appreciated.
>
Thanks for looking into this, I'm interested to hear how people go with
it. Some comments inline below.
>
> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 drm_linux.c
> --- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -0000 1.74
> +++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -0000
> @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
> return (num);
> }
>
> +struct vmalloc_entry {
> + const void *addr;
> + size_t size;
> + RBT_ENTRY(vmalloc_entry) vmalloc_node;
> +};
> +
> +struct pool vmalloc_pool;
> +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> +
> +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> +
> +static inline int
> +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry *b)
> +{
> + vaddr_t va = (vaddr_t)a->addr;
> + vaddr_t vb = (vaddr_t)b->addr;
> +
> + return va < vb ? -1 : va > vb;
> +}
> +
> +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> +
> +bool
> +is_vmalloc_addr(const void *addr)
> +{
> + struct vmalloc_entry key;
> + struct vmalloc_entry *entry;
> +
> + key.addr = addr;
> + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> + return (entry != NULL);
> +}
> +
> +void *
> +vmalloc(unsigned long size)
> +{
> + struct vmalloc_entry *entry;
> + void *addr;
> +
> + size = roundup(size, PAGE_SIZE);
this could just be round_page(size)
> + addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> + if (addr) {
> + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> + entry->addr = addr;
> + entry->size = size;
> + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> + }
> +
> + return addr;
> +}
> +
> +void *
> +vzalloc(unsigned long size)
> +{
> + struct vmalloc_entry *entry;
> + void *addr;
> +
> + size = roundup(size, PAGE_SIZE);
> + addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> + if (addr) {
> + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> + entry->addr = addr;
> + entry->size = size;
> + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> + }
> +
> + return addr;
> +}
> +
> +void
> +vfree(const void *addr)
> +{
> + struct vmalloc_entry key;
> + struct vmalloc_entry *entry;
> +
> + key.addr = addr;
> + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> + if (entry == NULL)
> + panic("%s: non vmalloced addr %p", __func__, addr);
> +
> + RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> + km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> + pool_put(&vmalloc_pool, entry);
> +}
> +
> +void *
> +kvmalloc(size_t size, gfp_t flags)
> +{
> + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> + return malloc(size, M_DRM, flags);
> + return vmalloc(size);
> +}
> +
> +void *
> +kvzalloc(size_t size, gfp_t flags)
> +{
> + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> + return malloc(size, M_DRM, flags | M_ZERO);
> + return vzalloc(size);
> +}
these should be (flags & GFP_KERNEL) != GFP_KERNEL to handle calls like
ttm->pages = kvmalloc_array(ttm->num_pages, sizeof(void*),
GFP_KERNEL | __GFP_ZERO);
> +
> +void
> +kvfree(const void *addr)
> +{
> + if (is_vmalloc_addr(addr))
> + vfree(addr);
> + else
> + free((void *)addr, M_DRM, 0);
> +}
> +
> struct vm_page *
> alloc_pages(unsigned int gfp_mask, unsigned int order)
> {
> @@ -1939,6 +2049,10 @@ drm_linux_init(void)
>
> pool_init(&idr_pool, sizeof(struct idr_entry), 0, IPL_TTY, 0,
> "idrpl", NULL);
> +
> + pool_init(&vmalloc_pool, sizeof(struct vmalloc_entry), 0, IPL_NONE, 0,
> + "vmallocpl", NULL);
> + RBT_INIT(vmalloc_tree, &vmalloc_tree);
> }
>
> void
> Index: dev/pci/drm/include/linux/mm.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/include/linux/mm.h,v
> retrieving revision 1.6
> diff -u -p -r1.6 mm.h
> --- dev/pci/drm/include/linux/mm.h 7 Dec 2020 09:10:42 -0000 1.6
> +++ dev/pci/drm/include/linux/mm.h 31 Dec 2020 20:44:38 -0000
> @@ -30,38 +30,26 @@
> #define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
> #define PFN_PHYS(x) ((x) << PAGE_SHIFT)
>
> -#define is_vmalloc_addr(ptr) true
> +bool is_vmalloc_addr(const void *);
>
> -static inline void *
> -kvmalloc(size_t size, gfp_t flags)
> -{
> - return malloc(size, M_DRM, flags);
> -}
> +void *kvmalloc(size_t, gfp_t);
> +void *kvzalloc(size_t, gfp_t);
> +void kvfree(const void *);
>
> static inline void *
> -kvmalloc_array(size_t n, size_t size, int flags)
> +kvmalloc_array(size_t n, size_t size, gfp_t flags)
> {
> if (n != 0 && SIZE_MAX / n < size)
> return NULL;
> - return malloc(n * size, M_DRM, flags);
> -}
> -
> -static inline void *
> -kvcalloc(size_t n, size_t size, int flags)
> -{
> - return kvmalloc_array(n, size, flags | M_ZERO);
> + return kvmalloc(n * size, flags);
> }
>
> static inline void *
> -kvzalloc(size_t size, int flags)
> -{
> - return malloc(size, M_DRM, flags | M_ZERO);
> -}
> -
> -static inline void
> -kvfree(const void *objp)
> +kvcalloc(size_t n, size_t size, gfp_t flags)
> {
> - free((void *)objp, M_DRM, 0);
> + if (n != 0 && SIZE_MAX / n < size)
> + return NULL;
> + return kvzalloc(n * size, flags);
> }
>
> static inline long
> Index: dev/pci/drm/include/linux/vmalloc.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/include/linux/vmalloc.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 vmalloc.h
> --- dev/pci/drm/include/linux/vmalloc.h 14 Apr 2019 10:14:53 -0000
> 1.1
> +++ dev/pci/drm/include/linux/vmalloc.h 31 Dec 2020 20:44:38 -0000
> @@ -25,25 +25,10 @@
> #include <linux/overflow.h>
>
> void *vmap(struct vm_page **, unsigned int, unsigned long, pgprot_t);
> -void vunmap(void *, size_t);
> -
> -static inline void *
> -vmalloc(unsigned long size)
> -{
> - return malloc(size, M_DRM, M_WAITOK | M_CANFAIL);
> -}
> -
> -static inline void *
> -vzalloc(unsigned long size)
> -{
> - return malloc(size, M_DRM, M_WAITOK | M_CANFAIL | M_ZERO);
> -}
> -
> -static inline void
> -vfree(void *objp)
> -{
> - free(objp, M_DRM, 0);
> -}
> +void vunmap(void *, size_t);
>
> +void *vmalloc(unsigned long);
> +void *vzalloc(unsigned long);
> +void vfree(const void *);
>
> #endif
>
>