> Date: Fri, 1 Jan 2021 20:10:45 +1100
> From: Jonathan Gray <[email protected]>
New diff at the end of this mail. Please test this one instead of the
previous one.
> On Thu, Dec 31, 2020 at 10:09:36PM +0100, Mark Kettenis wrote:
> > The diff below changes the emulated Linux memory allocation functions
> > a bit such that they only use malloc(9) for allocations smaller than a
> > page. This reduces pressure on the "interrupt safe" map and hopefully
> > will avoid the
> >
> > uvm_mapent_alloc: out of static map entries
> >
> > messages that some people have seen more often in the last few months.
> > It also could help with some memory allocation issues seem by
> > amdgpu(4) users.
> >
> > The downside of this approach is that memory leaks will be harder to
> > spot as the larger memory allocations are no longer included in the
> > DRM type as reported by vmstat -m. Another downside is that this no
> > longer caps the amount of kernel memory that can be allocated by
> > drm(4). If that turns out to be a problem, we can impose a limit on
> > the amount of memory that can be allocated this way.
> >
> > The implementation needs to keep track of the size of each allocated
> > memory block. This is done using a red-black tree. Our kernel uses
> > red-black trees in similar situations but I wouldn't call myself an
> > expert in the area of data structures so a there may be a better
> > approach.
> >
> > Some real-life testing would be appreciated.
> >
>
> Thanks for looking into this, I'm interested to hear how people go with
> it. Some comments inline below.
>
> >
> > Index: dev/pci/drm/drm_linux.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> > retrieving revision 1.74
> > diff -u -p -r1.74 drm_linux.c
> > --- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -0000 1.74
> > +++ dev/pci/drm/drm_linux.c 31 Dec 2020 20:44:38 -0000
> > @@ -430,6 +430,116 @@ dmi_check_system(const struct dmi_system
> > return (num);
> > }
> >
> > +struct vmalloc_entry {
> > + const void *addr;
> > + size_t size;
> > + RBT_ENTRY(vmalloc_entry) vmalloc_node;
> > +};
> > +
> > +struct pool vmalloc_pool;
> > +RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
> > +
> > +RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +static inline int
> > +vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry
> > *b)
> > +{
> > + vaddr_t va = (vaddr_t)a->addr;
> > + vaddr_t vb = (vaddr_t)b->addr;
> > +
> > + return va < vb ? -1 : va > vb;
> > +}
> > +
> > +RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
> > +
> > +bool
> > +is_vmalloc_addr(const void *addr)
> > +{
> > + struct vmalloc_entry key;
> > + struct vmalloc_entry *entry;
> > +
> > + key.addr = addr;
> > + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > + return (entry != NULL);
> > +}
> > +
> > +void *
> > +vmalloc(unsigned long size)
> > +{
> > + struct vmalloc_entry *entry;
> > + void *addr;
> > +
> > + size = roundup(size, PAGE_SIZE);
>
> this could just be round_page(size)
Sure.
> > + addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
> > + if (addr) {
> > + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > + entry->addr = addr;
> > + entry->size = size;
> > + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > + }
> > +
> > + return addr;
> > +}
> > +
> > +void *
> > +vzalloc(unsigned long size)
> > +{
> > + struct vmalloc_entry *entry;
> > + void *addr;
> > +
> > + size = roundup(size, PAGE_SIZE);
> > + addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
> > + if (addr) {
> > + entry = pool_get(&vmalloc_pool, PR_WAITOK);
> > + entry->addr = addr;
> > + entry->size = size;
> > + RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
> > + }
> > +
> > + return addr;
> > +}
> > +
> > +void
> > +vfree(const void *addr)
> > +{
> > + struct vmalloc_entry key;
> > + struct vmalloc_entry *entry;
> > +
> > + key.addr = addr;
> > + entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
> > + if (entry == NULL)
> > + panic("%s: non vmalloced addr %p", __func__, addr);
> > +
> > + RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
> > + km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
> > + pool_put(&vmalloc_pool, entry);
> > +}
> > +
> > +void *
> > +kvmalloc(size_t size, gfp_t flags)
> > +{
> > + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > + return malloc(size, M_DRM, flags);
> > + return vmalloc(size);
> > +}
> > +
> > +void *
> > +kvzalloc(size_t size, gfp_t flags)
> > +{
> > + if (flags != GFP_KERNEL || size < PAGE_SIZE)
> > + return malloc(size, M_DRM, flags | M_ZERO);
> > + return vzalloc(size);
> > +}
>
> these should be (flags & GFP_KERNEL) != GFP_KERNEL to handle calls like
>
> ttm->pages = kvmalloc_array(ttm->num_pages, sizeof(void*),
> GFP_KERNEL | __GFP_ZERO);
Actually that wouldn't work. But this points out that we kvmalloc()
needs to handle __GFP_ZERO properly. So a slightly different approach
down below. If the flags include M_NOWAIT or if the size is less of a
page go down the malloc path. Otherwise call vmalloc() or kzalloc()
based on the M_ZERO flag. kvzalloc() becomes a simple wrapper around
kvmalloc() that sets the M_ZERO flag.
Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.74
diff -u -p -r1.74 drm_linux.c
--- dev/pci/drm/drm_linux.c 31 Dec 2020 06:31:55 -0000 1.74
+++ dev/pci/drm/drm_linux.c 1 Jan 2021 14:27:39 -0000
@@ -430,6 +430,111 @@ dmi_check_system(const struct dmi_system
return (num);
}
+struct vmalloc_entry {
+ const void *addr;
+ size_t size;
+ RBT_ENTRY(vmalloc_entry) vmalloc_node;
+};
+
+struct pool vmalloc_pool;
+RBT_HEAD(vmalloc_tree, vmalloc_entry) vmalloc_tree;
+
+RBT_PROTOTYPE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
+
+static inline int
+vmalloc_compare(const struct vmalloc_entry *a, const struct vmalloc_entry *b)
+{
+ vaddr_t va = (vaddr_t)a->addr;
+ vaddr_t vb = (vaddr_t)b->addr;
+
+ return va < vb ? -1 : va > vb;
+}
+
+RBT_GENERATE(vmalloc_tree, vmalloc_entry, vmalloc_node, vmalloc_compare);
+
+bool
+is_vmalloc_addr(const void *addr)
+{
+ struct vmalloc_entry key;
+ struct vmalloc_entry *entry;
+
+ key.addr = addr;
+ entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
+ return (entry != NULL);
+}
+
+void *
+vmalloc(unsigned long size)
+{
+ struct vmalloc_entry *entry;
+ void *addr;
+
+ size = round_page(size);
+ addr = km_alloc(size, &kv_any, &kp_dirty, &kd_waitok);
+ if (addr) {
+ entry = pool_get(&vmalloc_pool, PR_WAITOK);
+ entry->addr = addr;
+ entry->size = size;
+ RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
+ }
+
+ return addr;
+}
+
+void *
+vzalloc(unsigned long size)
+{
+ struct vmalloc_entry *entry;
+ void *addr;
+
+ size = round_page(size);
+ addr = km_alloc(size, &kv_any, &kp_zero, &kd_waitok);
+ if (addr) {
+ entry = pool_get(&vmalloc_pool, PR_WAITOK);
+ entry->addr = addr;
+ entry->size = size;
+ RBT_INSERT(vmalloc_tree, &vmalloc_tree, entry);
+ }
+
+ return addr;
+}
+
+void
+vfree(const void *addr)
+{
+ struct vmalloc_entry key;
+ struct vmalloc_entry *entry;
+
+ key.addr = addr;
+ entry = RBT_FIND(vmalloc_tree, &vmalloc_tree, &key);
+ if (entry == NULL)
+ panic("%s: non vmalloced addr %p", __func__, addr);
+
+ RBT_REMOVE(vmalloc_tree, &vmalloc_tree, entry);
+ km_free((void *)addr, entry->size, &kv_any, &kp_dirty);
+ pool_put(&vmalloc_pool, entry);
+}
+
+void *
+kvmalloc(size_t size, gfp_t flags)
+{
+ if ((flags & M_NOWAIT) || size < PAGE_SIZE)
+ return malloc(size, M_DRM, flags);
+ if (flags & M_ZERO)
+ return vzalloc(size);
+ else
+ return vmalloc(size);
+}
+
+void
+kvfree(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ free((void *)addr, M_DRM, 0);
+}
+
struct vm_page *
alloc_pages(unsigned int gfp_mask, unsigned int order)
{
@@ -1939,6 +2044,10 @@ drm_linux_init(void)
pool_init(&idr_pool, sizeof(struct idr_entry), 0, IPL_TTY, 0,
"idrpl", NULL);
+
+ pool_init(&vmalloc_pool, sizeof(struct vmalloc_entry), 0, IPL_NONE, 0,
+ "vmallocpl", NULL);
+ RBT_INIT(vmalloc_tree, &vmalloc_tree);
}
void
Index: dev/pci/drm/include/linux/mm.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/include/linux/mm.h,v
retrieving revision 1.6
diff -u -p -r1.6 mm.h
--- dev/pci/drm/include/linux/mm.h 7 Dec 2020 09:10:42 -0000 1.6
+++ dev/pci/drm/include/linux/mm.h 1 Jan 2021 14:27:39 -0000
@@ -30,38 +30,31 @@
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-#define is_vmalloc_addr(ptr) true
+bool is_vmalloc_addr(const void *);
-static inline void *
-kvmalloc(size_t size, gfp_t flags)
-{
- return malloc(size, M_DRM, flags);
-}
+void *kvmalloc(size_t, gfp_t);
+void kvfree(const void *);
static inline void *
-kvmalloc_array(size_t n, size_t size, int flags)
+kvmalloc_array(size_t n, size_t size, gfp_t flags)
{
if (n != 0 && SIZE_MAX / n < size)
return NULL;
- return malloc(n * size, M_DRM, flags);
+ return kvmalloc(n * size, flags);
}
static inline void *
-kvcalloc(size_t n, size_t size, int flags)
+kvcalloc(size_t n, size_t size, gfp_t flags)
{
- return kvmalloc_array(n, size, flags | M_ZERO);
+ if (n != 0 && SIZE_MAX / n < size)
+ return NULL;
+ return kvmalloc(n * size, flags | M_ZERO);
}
static inline void *
-kvzalloc(size_t size, int flags)
-{
- return malloc(size, M_DRM, flags | M_ZERO);
-}
-
-static inline void
-kvfree(const void *objp)
+kvzalloc(size_t size, gfp_t flags)
{
- free((void *)objp, M_DRM, 0);
+ return kvmalloc(size, flags | M_ZERO);
}
static inline long
Index: dev/pci/drm/include/linux/vmalloc.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/include/linux/vmalloc.h,v
retrieving revision 1.1
diff -u -p -r1.1 vmalloc.h
--- dev/pci/drm/include/linux/vmalloc.h 14 Apr 2019 10:14:53 -0000 1.1
+++ dev/pci/drm/include/linux/vmalloc.h 1 Jan 2021 14:27:39 -0000
@@ -25,25 +25,10 @@
#include <linux/overflow.h>
void *vmap(struct vm_page **, unsigned int, unsigned long, pgprot_t);
-void vunmap(void *, size_t);
-
-static inline void *
-vmalloc(unsigned long size)
-{
- return malloc(size, M_DRM, M_WAITOK | M_CANFAIL);
-}
-
-static inline void *
-vzalloc(unsigned long size)
-{
- return malloc(size, M_DRM, M_WAITOK | M_CANFAIL | M_ZERO);
-}
-
-static inline void
-vfree(void *objp)
-{
- free(objp, M_DRM, 0);
-}
+void vunmap(void *, size_t);
+void *vmalloc(unsigned long);
+void *vzalloc(unsigned long);
+void vfree(const void *);
#endif